Beispiel #1
0
def _find_gridrad_file_for_date(top_gridrad_dir_name, spc_date_string):
    """Tries to find one GridRad file for given SPC date.

    :param top_gridrad_dir_name: See documentation at top of file.
    :param spc_date_string: SPC date or convective day (format "yyyymmdd").
    :return: gridrad_file_name: Path to GridRad file.  If no files were found
        for the given SPC date, returns None.
    """

    first_time_unix_sec = time_conversion.get_start_of_spc_date(
        spc_date_string)
    last_time_unix_sec = time_conversion.get_end_of_spc_date(spc_date_string)
    all_times_unix_sec = time_periods.range_and_interval_to_list(
        start_time_unix_sec=first_time_unix_sec,
        end_time_unix_sec=last_time_unix_sec,
        time_interval_sec=TIME_INTERVAL_SEC,
        include_endpoint=True)

    for this_time_unix_sec in all_times_unix_sec:
        this_gridrad_file_name = gridrad_io.find_file(
            unix_time_sec=this_time_unix_sec,
            top_directory_name=top_gridrad_dir_name,
            raise_error_if_missing=False)

        if os.path.isfile(this_gridrad_file_name):
            return this_gridrad_file_name

    return None
Beispiel #2
0
def _download_rap_analyses(first_init_time_string, last_init_time_string,
                           top_local_directory_name):
    """Downloads zero-hour analyses from the RAP (Rapid Refresh) model.

    :param first_init_time_string: See documentation at top of file.
    :param last_init_time_string: Same.
    :param top_local_directory_name: Same.
    """

    first_init_time_unix_sec = time_conversion.string_to_unix_sec(
        first_init_time_string, INPUT_TIME_FORMAT)
    last_init_time_unix_sec = time_conversion.string_to_unix_sec(
        last_init_time_string, INPUT_TIME_FORMAT)
    time_interval_sec = HOURS_TO_SECONDS * nwp_model_utils.get_time_steps(
        nwp_model_utils.RAP_MODEL_NAME)[1]

    init_times_unix_sec = time_periods.range_and_interval_to_list(
        start_time_unix_sec=first_init_time_unix_sec,
        end_time_unix_sec=last_init_time_unix_sec,
        time_interval_sec=time_interval_sec)
    init_time_strings = [
        time_conversion.unix_sec_to_string(t, DEFAULT_TIME_FORMAT)
        for t in init_times_unix_sec]

    num_init_times = len(init_times_unix_sec)
    local_file_names = [None] * num_init_times

    for i in range(num_init_times):
        local_file_names[i] = nwp_model_io.find_rap_file_any_grid(
            top_directory_name=top_local_directory_name,
            init_time_unix_sec=init_times_unix_sec[i], lead_time_hours=0,
            raise_error_if_missing=False)
        if local_file_names[i] is not None:
            continue

        local_file_names[i] = nwp_model_io.download_rap_file_any_grid(
            top_local_directory_name=top_local_directory_name,
            init_time_unix_sec=init_times_unix_sec[i], lead_time_hours=0,
            raise_error_if_fails=False)

        if local_file_names[i] is None:
            print '\nPROBLEM.  Download failed for {0:s}.\n\n'.format(
                init_time_strings[i])
        else:
            print '\nSUCCESS.  File was downloaded to "{0:s}".\n\n'.format(
                local_file_names[i])

        time.sleep(SECONDS_TO_PAUSE_BETWEEN_FILES)

    num_downloaded = numpy.sum(numpy.array(
        [f is not None for f in local_file_names]))
    print '{0:d} of {1:d} files were downloaded successfully!'.format(
        num_downloaded, num_init_times)
Beispiel #3
0
    def test_range_and_interval_to_list_exclude_endpoint(self):
        """Ensures correct output from range_and_interval_to_list.

        In this case, endpoint of period is excluded from list of exact times.
        """

        these_times_unix_sec = time_periods.range_and_interval_to_list(
            start_time_unix_sec=ESTIMATED_START_TIME_UNIX_SEC,
            end_time_unix_sec=ESTIMATED_END_TIME_UNIX_SEC,
            time_interval_sec=TIME_INTERVAL_SEC,
            include_endpoint=False)
        self.assertTrue(
            numpy.array_equal(these_times_unix_sec,
                              TIMES_WITHOUT_ENDPOINT_UNIX_SEC))
def find_processed_hourly_files(start_time_unix_sec=None,
                                end_time_unix_sec=None,
                                primary_source=None,
                                secondary_source=None,
                                top_directory_name=None,
                                raise_error_if_missing=True):
    """Finds processed hourly wind files on local machine.

    N = number of hours in time period (start_time_unix_sec...end_time_unix_sec)

    :param start_time_unix_sec: Beginning of time period.
    :param end_time_unix_sec: End of time period.
    :param primary_source: String ID for primary data source.
    :param secondary_source: String ID for secondary data source.
    :param top_directory_name: Name of top-level directory with processed wind
        files.
    :param raise_error_if_missing: Boolean flag.  If True and *any* file is
        missing, this method will raise an error.
    :return: processed_file_names: length-N list of paths to processed files.
    :return: hours_unix_sec: length-N numpy array of corresponding hours.
    """

    min_hour_unix_sec = int(
        rounder.floor_to_nearest(start_time_unix_sec, HOURS_TO_SECONDS))
    max_hour_unix_sec = int(
        rounder.floor_to_nearest(end_time_unix_sec, HOURS_TO_SECONDS))

    hours_unix_sec = time_periods.range_and_interval_to_list(
        start_time_unix_sec=min_hour_unix_sec,
        end_time_unix_sec=max_hour_unix_sec,
        time_interval_sec=HOURS_TO_SECONDS,
        include_endpoint=True)

    num_hours = len(hours_unix_sec)
    processed_file_names = [''] * num_hours

    for i in range(num_hours):
        processed_file_names[i] = find_processed_file(
            start_time_unix_sec=hours_unix_sec[i],
            end_time_unix_sec=hours_unix_sec[i] + HOURS_TO_SECONDS - 1,
            primary_source=primary_source,
            secondary_source=secondary_source,
            top_directory_name=top_directory_name,
            raise_error_if_missing=raise_error_if_missing)

    return processed_file_names, hours_unix_sec
Beispiel #5
0
def get_spc_dates_in_range(first_spc_date_string, last_spc_date_string):
    """Returns list of SPC dates in range.

    :param first_spc_date_string: First SPC date in range (format "yyyymmdd").
    :param last_spc_date_string: Last SPC date in range (format "yyyymmdd").
    :return: spc_date_strings: 1-D list of SPC dates (format "yyyymmdd").
    """

    first_spc_date_unix_sec = string_to_unix_sec(first_spc_date_string,
                                                 SPC_DATE_FORMAT)
    last_spc_date_unix_sec = string_to_unix_sec(last_spc_date_string,
                                                SPC_DATE_FORMAT)
    error_checking.assert_is_geq(last_spc_date_unix_sec,
                                 first_spc_date_unix_sec)

    spc_dates_unix_sec = time_periods.range_and_interval_to_list(
        start_time_unix_sec=first_spc_date_unix_sec,
        end_time_unix_sec=last_spc_date_unix_sec,
        time_interval_sec=DAYS_TO_SECONDS,
        include_endpoint=True)

    return [unix_sec_to_string(t, SPC_DATE_FORMAT) for t in spc_dates_unix_sec]
def _run(input_dir_name, first_init_time_string, last_init_time_string,
         output_dir_name):
    """Combines forecasts from different initial times.

    This is effectively the main method.

    :param input_dir_name: See documentation at top of file.
    :param first_init_time_string: Same.
    :param last_init_time_string: Same.
    :param output_dir_name: Same.
    """

    first_init_time_unix_sec = time_conversion.string_to_unix_sec(
        first_init_time_string, INPUT_TIME_FORMAT)
    last_init_time_unix_sec = time_conversion.string_to_unix_sec(
        last_init_time_string, INPUT_TIME_FORMAT)

    init_times_unix_sec = time_periods.range_and_interval_to_list(
        start_time_unix_sec=first_init_time_unix_sec,
        end_time_unix_sec=last_init_time_unix_sec,
        time_interval_sec=MAX_LEAD_TIME_SECONDS,
        include_endpoint=True)

    probability_matrix = None
    gridded_forecast_dict = None

    for this_time_unix_sec in init_times_unix_sec:
        this_file_name = prediction_io.find_gridded_file(
            directory_name=input_dir_name,
            first_init_time_unix_sec=this_time_unix_sec,
            last_init_time_unix_sec=this_time_unix_sec,
            raise_error_if_missing=True)

        print('Reading data from: "{0:s}"...'.format(this_file_name))
        gridded_forecast_dict = prediction_io.read_gridded_predictions(
            this_file_name)

        assert (gridded_forecast_dict[prediction_io.MIN_LEAD_TIME_KEY] ==
                MIN_LEAD_TIME_SECONDS)
        assert (gridded_forecast_dict[prediction_io.MAX_LEAD_TIME_KEY] ==
                MAX_LEAD_TIME_SECONDS)

        this_probability_matrix = gridded_forecast_dict[
            prediction_io.XY_PROBABILITIES_KEY][0]

        if not isinstance(this_probability_matrix, numpy.ndarray):
            this_probability_matrix = this_probability_matrix.toarray()

        if probability_matrix is None:
            probability_matrix = this_probability_matrix + 0.
        else:
            probability_matrix = numpy.stack(
                (probability_matrix, this_probability_matrix), axis=-1)
            probability_matrix = numpy.nanmax(probability_matrix, axis=-1)

        print(probability_matrix.shape)

    print('\n')

    for this_key in prediction_io.LATLNG_KEYS:
        if this_key in gridded_forecast_dict:
            gridded_forecast_dict.pop(this_key)

    gridded_forecast_dict[prediction_io.INIT_TIMES_KEY] = (
        init_times_unix_sec[[0]])
    gridded_forecast_dict[prediction_io.MAX_LEAD_TIME_KEY] = (
        init_times_unix_sec[-1] + MAX_LEAD_TIME_SECONDS -
        init_times_unix_sec[0])
    gridded_forecast_dict[prediction_io.XY_PROBABILITIES_KEY] = ([
        probability_matrix
    ])

    output_file_name = prediction_io.find_gridded_file(
        directory_name=output_dir_name,
        first_init_time_unix_sec=init_times_unix_sec[0],
        last_init_time_unix_sec=init_times_unix_sec[-1],
        raise_error_if_missing=False)

    print('Writing final grid to: "{0:s}"...'.format(output_file_name))

    prediction_io.write_gridded_predictions(
        gridded_forecast_dict=gridded_forecast_dict,
        pickle_file_name=output_file_name)
Beispiel #7
0
def _run(first_time_string, last_time_string, randomize_times, num_times,
         thermal_field_name, smoothing_radius_pixels, warm_front_percentile,
         cold_front_percentile, num_closing_iters, pressure_level_mb,
         top_narr_directory_name, narr_mask_file_name, output_dir_name):
    """Uses NFA (numerical frontal analysis) to predict front type at each px.

    This is effectively the main method.

    :param first_time_string: See documentation at top of file.
    :param last_time_string: Same.
    :param randomize_times: Same.
    :param num_times: Same.
    :param thermal_field_name: Same.
    :param smoothing_radius_pixels: Same.
    :param warm_front_percentile: Same.
    :param cold_front_percentile: Same.
    :param num_closing_iters: Same.
    :param pressure_level_mb: Same.
    :param top_narr_directory_name: Same.
    :param narr_mask_file_name: Same.
    :param output_dir_name: Same.
    :raises: ValueError: if
        `thermal_field_name not in VALID_THERMAL_FIELD_NAMES`.
    """

    if thermal_field_name not in VALID_THERMAL_FIELD_NAMES:
        error_string = (
            '\n{0:s}\nValid thermal fields (listed above) do not include '
            '"{1:s}".').format(str(VALID_THERMAL_FIELD_NAMES),
                               thermal_field_name)

        raise ValueError(error_string)

    cutoff_radius_pixels = 4 * smoothing_radius_pixels

    first_time_unix_sec = time_conversion.string_to_unix_sec(
        first_time_string, INPUT_TIME_FORMAT)
    last_time_unix_sec = time_conversion.string_to_unix_sec(
        last_time_string, INPUT_TIME_FORMAT)
    valid_times_unix_sec = time_periods.range_and_interval_to_list(
        start_time_unix_sec=first_time_unix_sec,
        end_time_unix_sec=last_time_unix_sec,
        time_interval_sec=NARR_TIME_INTERVAL_SEC,
        include_endpoint=True)

    if randomize_times:
        error_checking.assert_is_leq(num_times, len(valid_times_unix_sec))
        numpy.random.shuffle(valid_times_unix_sec)
        valid_times_unix_sec = valid_times_unix_sec[:num_times]

    if narr_mask_file_name == '':
        num_grid_rows, num_grid_columns = nwp_model_utils.get_grid_dimensions(
            model_name=nwp_model_utils.NARR_MODEL_NAME)
        narr_mask_matrix = numpy.full((num_grid_rows, num_grid_columns),
                                      1,
                                      dtype=int)
    else:
        print 'Reading mask from: "{0:s}"...\n'.format(narr_mask_file_name)
        narr_mask_matrix = ml_utils.read_narr_mask(narr_mask_file_name)

    x_spacing_metres, y_spacing_metres = nwp_model_utils.get_xy_grid_spacing(
        model_name=nwp_model_utils.NARR_MODEL_NAME)

    num_times = len(valid_times_unix_sec)
    for i in range(num_times):
        this_thermal_file_name = processed_narr_io.find_file_for_one_time(
            top_directory_name=top_narr_directory_name,
            field_name=thermal_field_name,
            pressure_level_mb=pressure_level_mb,
            valid_time_unix_sec=valid_times_unix_sec[i])

        print 'Reading data from: "{0:s}"...'.format(this_thermal_file_name)
        this_thermal_matrix_kelvins = processed_narr_io.read_fields_from_file(
            this_thermal_file_name)[0][0, ...]

        this_thermal_matrix_kelvins = general_utils.fill_nans(
            this_thermal_matrix_kelvins)
        this_thermal_matrix_kelvins = nfa.gaussian_smooth_2d_field(
            field_matrix=this_thermal_matrix_kelvins,
            standard_deviation_pixels=smoothing_radius_pixels,
            cutoff_radius_pixels=cutoff_radius_pixels)

        this_u_wind_file_name = processed_narr_io.find_file_for_one_time(
            top_directory_name=top_narr_directory_name,
            field_name=processed_narr_io.U_WIND_GRID_RELATIVE_NAME,
            pressure_level_mb=pressure_level_mb,
            valid_time_unix_sec=valid_times_unix_sec[i])

        print 'Reading data from: "{0:s}"...'.format(this_u_wind_file_name)
        this_u_wind_matrix_m_s01 = processed_narr_io.read_fields_from_file(
            this_u_wind_file_name)[0][0, ...]

        this_u_wind_matrix_m_s01 = general_utils.fill_nans(
            this_u_wind_matrix_m_s01)
        this_u_wind_matrix_m_s01 = nfa.gaussian_smooth_2d_field(
            field_matrix=this_u_wind_matrix_m_s01,
            standard_deviation_pixels=smoothing_radius_pixels,
            cutoff_radius_pixels=cutoff_radius_pixels)

        this_v_wind_file_name = processed_narr_io.find_file_for_one_time(
            top_directory_name=top_narr_directory_name,
            field_name=processed_narr_io.V_WIND_GRID_RELATIVE_NAME,
            pressure_level_mb=pressure_level_mb,
            valid_time_unix_sec=valid_times_unix_sec[i])

        print 'Reading data from: "{0:s}"...'.format(this_v_wind_file_name)
        this_v_wind_matrix_m_s01 = processed_narr_io.read_fields_from_file(
            this_v_wind_file_name)[0][0, ...]

        this_v_wind_matrix_m_s01 = general_utils.fill_nans(
            this_v_wind_matrix_m_s01)
        this_v_wind_matrix_m_s01 = nfa.gaussian_smooth_2d_field(
            field_matrix=this_v_wind_matrix_m_s01,
            standard_deviation_pixels=smoothing_radius_pixels,
            cutoff_radius_pixels=cutoff_radius_pixels)

        this_tfp_matrix_kelvins_m02 = nfa.get_thermal_front_param(
            thermal_field_matrix_kelvins=this_thermal_matrix_kelvins,
            x_spacing_metres=x_spacing_metres,
            y_spacing_metres=y_spacing_metres)
        this_tfp_matrix_kelvins_m02[narr_mask_matrix == 0] = 0.

        this_proj_velocity_matrix_m_s01 = nfa.project_wind_to_thermal_gradient(
            u_matrix_grid_relative_m_s01=this_u_wind_matrix_m_s01,
            v_matrix_grid_relative_m_s01=this_v_wind_matrix_m_s01,
            thermal_field_matrix_kelvins=this_thermal_matrix_kelvins,
            x_spacing_metres=x_spacing_metres,
            y_spacing_metres=y_spacing_metres)

        this_locating_var_matrix_m01_s01 = nfa.get_locating_variable(
            tfp_matrix_kelvins_m02=this_tfp_matrix_kelvins_m02,
            projected_velocity_matrix_m_s01=this_proj_velocity_matrix_m_s01)

        this_predicted_label_matrix = nfa.get_front_types(
            locating_var_matrix_m01_s01=this_locating_var_matrix_m01_s01,
            warm_front_percentile=warm_front_percentile,
            cold_front_percentile=cold_front_percentile)

        this_predicted_label_matrix = front_utils.close_frontal_image(
            ternary_image_matrix=this_predicted_label_matrix,
            num_iterations=num_closing_iters)

        this_prediction_file_name = nfa.find_prediction_file(
            directory_name=output_dir_name,
            first_valid_time_unix_sec=valid_times_unix_sec[i],
            last_valid_time_unix_sec=valid_times_unix_sec[i],
            ensembled=False,
            raise_error_if_missing=False)

        print 'Writing gridded predictions to file: "{0:s}"...\n'.format(
            this_prediction_file_name)

        nfa.write_gridded_predictions(
            pickle_file_name=this_prediction_file_name,
            predicted_label_matrix=numpy.expand_dims(
                this_predicted_label_matrix, axis=0),
            valid_times_unix_sec=valid_times_unix_sec[[i]],
            narr_mask_matrix=narr_mask_matrix,
            pressure_level_mb=pressure_level_mb,
            smoothing_radius_pixels=smoothing_radius_pixels,
            cutoff_radius_pixels=cutoff_radius_pixels,
            warm_front_percentile=warm_front_percentile,
            cold_front_percentile=cold_front_percentile,
            num_closing_iters=num_closing_iters)
def _run_for_gridrad(spc_date_string, top_radar_dir_name, top_output_dir_name,
                     option_dict):
    """Runs echo classification for GridRad data.

    :param spc_date_string: See documentation at top of file.
    :param top_radar_dir_name: Same.
    :param top_output_dir_name: Same.
    :param option_dict: See doc for
        `echo_classification.find_convective_pixels`.
    """

    valid_times_unix_sec = time_periods.range_and_interval_to_list(
        start_time_unix_sec=time_conversion.get_start_of_spc_date(
            spc_date_string),
        end_time_unix_sec=time_conversion.get_end_of_spc_date(spc_date_string),
        time_interval_sec=TIME_INTERVAL_SEC,
        include_endpoint=True)

    num_times = len(valid_times_unix_sec)
    radar_file_names = [''] * num_times
    indices_to_keep = []

    for i in range(num_times):
        radar_file_names[i] = gridrad_io.find_file(
            top_directory_name=top_radar_dir_name,
            unix_time_sec=valid_times_unix_sec[i],
            raise_error_if_missing=False)

        if os.path.isfile(radar_file_names[i]):
            indices_to_keep.append(i)

    indices_to_keep = numpy.array(indices_to_keep, dtype=int)
    valid_times_unix_sec = valid_times_unix_sec[indices_to_keep]
    radar_file_names = [radar_file_names[k] for k in indices_to_keep]
    num_times = len(valid_times_unix_sec)

    for i in range(num_times):
        print 'Reading data from: "{0:s}"...\n'.format(radar_file_names[i])
        radar_metadata_dict = gridrad_io.read_metadata_from_full_grid_file(
            netcdf_file_name=radar_file_names[i])

        (reflectivity_matrix_dbz, all_heights_m_asl, grid_point_latitudes_deg,
         grid_point_longitudes_deg
         ) = gridrad_io.read_field_from_full_grid_file(
             netcdf_file_name=radar_file_names[i],
             field_name=radar_utils.REFL_NAME,
             metadata_dict=radar_metadata_dict)

        reflectivity_matrix_dbz = numpy.rollaxis(reflectivity_matrix_dbz,
                                                 axis=0,
                                                 start=3)

        height_indices = numpy.array(
            [all_heights_m_asl.tolist().index(h) for h in RADAR_HEIGHTS_M_ASL],
            dtype=int)
        reflectivity_matrix_dbz = reflectivity_matrix_dbz[..., height_indices]

        grid_metadata_dict = {
            echo_classifn.MIN_LATITUDE_KEY:
            numpy.min(grid_point_latitudes_deg),
            echo_classifn.LATITUDE_SPACING_KEY:
            grid_point_latitudes_deg[1] - grid_point_latitudes_deg[0],
            echo_classifn.MIN_LONGITUDE_KEY:
            numpy.min(grid_point_longitudes_deg),
            echo_classifn.LONGITUDE_SPACING_KEY:
            grid_point_longitudes_deg[1] - grid_point_longitudes_deg[0],
            echo_classifn.HEIGHTS_KEY:
            RADAR_HEIGHTS_M_ASL
        }

        convective_flag_matrix = echo_classifn.find_convective_pixels(
            reflectivity_matrix_dbz=reflectivity_matrix_dbz,
            grid_metadata_dict=grid_metadata_dict,
            valid_time_unix_sec=valid_times_unix_sec[i],
            option_dict=option_dict)

        print 'Number of convective pixels = {0:d}\n'.format(
            numpy.sum(convective_flag_matrix))

        this_output_file_name = echo_classifn.find_classification_file(
            top_directory_name=top_output_dir_name,
            valid_time_unix_sec=valid_times_unix_sec[i],
            desire_zipped=False,
            allow_zipped_or_unzipped=False,
            raise_error_if_missing=False)

        print 'Writing echo classifications to: "{0:s}"...'.format(
            this_output_file_name)

        echo_classifn.write_classifications(
            convective_flag_matrix=convective_flag_matrix,
            grid_metadata_dict=grid_metadata_dict,
            valid_time_unix_sec=valid_times_unix_sec[i],
            option_dict=option_dict,
            netcdf_file_name=this_output_file_name)

        print SEPARATOR_STRING
def _run(model_file_name, first_time_string, last_time_string, randomize_times,
         num_target_times, use_isotonic_regression, top_narr_directory_name,
         top_frontal_grid_dir_name, output_dir_name):
    """Applies traditional CNN to full grids.

    This is effectively the main method.

    :param model_file_name: See documentation at top of file.
    :param first_time_string: Same.
    :param last_time_string: Same.
    :param randomize_times: Same.
    :param num_target_times: Same.
    :param use_isotonic_regression: Same.
    :param top_narr_directory_name: Same.
    :param top_frontal_grid_dir_name: Same.
    :param output_dir_name: Same.
    """

    first_time_unix_sec = time_conversion.string_to_unix_sec(
        first_time_string, INPUT_TIME_FORMAT)
    last_time_unix_sec = time_conversion.string_to_unix_sec(
        last_time_string, INPUT_TIME_FORMAT)
    target_times_unix_sec = time_periods.range_and_interval_to_list(
        start_time_unix_sec=first_time_unix_sec,
        end_time_unix_sec=last_time_unix_sec,
        time_interval_sec=NARR_TIME_INTERVAL_SEC, include_endpoint=True)

    if randomize_times:
        error_checking.assert_is_leq(
            num_target_times, len(target_times_unix_sec))
        numpy.random.shuffle(target_times_unix_sec)
        target_times_unix_sec = target_times_unix_sec[:num_target_times]

    print 'Reading model from: "{0:s}"...'.format(model_file_name)
    model_object = traditional_cnn.read_keras_model(model_file_name)

    model_metafile_name = traditional_cnn.find_metafile(
        model_file_name=model_file_name, raise_error_if_missing=True)

    print 'Reading model metadata from: "{0:s}"...'.format(
        model_metafile_name)
    model_metadata_dict = traditional_cnn.read_model_metadata(
        model_metafile_name)

    if use_isotonic_regression:
        isotonic_file_name = isotonic_regression.find_model_file(
            base_model_file_name=model_file_name, raise_error_if_missing=True)

        print 'Reading isotonic-regression models from: "{0:s}"...'.format(
            isotonic_file_name)
        isotonic_model_object_by_class = (
            isotonic_regression.read_model_for_each_class(isotonic_file_name)
        )
    else:
        isotonic_model_object_by_class = None

    if model_metadata_dict[traditional_cnn.NUM_LEAD_TIME_STEPS_KEY] is None:
        num_dimensions = 3
    else:
        num_dimensions = 4

    num_classes = len(model_metadata_dict[traditional_cnn.CLASS_FRACTIONS_KEY])
    num_target_times = len(target_times_unix_sec)
    print SEPARATOR_STRING

    for i in range(num_target_times):
        if num_dimensions == 3:
            (this_class_probability_matrix, this_target_matrix
            ) = traditional_cnn.apply_model_to_3d_example(
                model_object=model_object,
                target_time_unix_sec=target_times_unix_sec[i],
                top_narr_directory_name=top_narr_directory_name,
                top_frontal_grid_dir_name=top_frontal_grid_dir_name,
                narr_predictor_names=model_metadata_dict[
                    traditional_cnn.NARR_PREDICTOR_NAMES_KEY],
                pressure_level_mb=model_metadata_dict[
                    traditional_cnn.PRESSURE_LEVEL_KEY],
                dilation_distance_metres=model_metadata_dict[
                    traditional_cnn.DILATION_DISTANCE_FOR_TARGET_KEY],
                num_rows_in_half_grid=model_metadata_dict[
                    traditional_cnn.NUM_ROWS_IN_HALF_GRID_KEY],
                num_columns_in_half_grid=model_metadata_dict[
                    traditional_cnn.NUM_COLUMNS_IN_HALF_GRID_KEY],
                num_classes=num_classes,
                isotonic_model_object_by_class=isotonic_model_object_by_class,
                narr_mask_matrix=model_metadata_dict[
                    traditional_cnn.NARR_MASK_MATRIX_KEY])
        else:
            (this_class_probability_matrix, this_target_matrix
            ) = traditional_cnn.apply_model_to_4d_example(
                model_object=model_object,
                target_time_unix_sec=target_times_unix_sec[i],
                predictor_time_step_offsets=model_metadata_dict[
                    traditional_cnn.PREDICTOR_TIME_STEP_OFFSETS_KEY],
                num_lead_time_steps=model_metadata_dict[
                    traditional_cnn.NUM_LEAD_TIME_STEPS_KEY],
                top_narr_directory_name=top_narr_directory_name,
                top_frontal_grid_dir_name=top_frontal_grid_dir_name,
                narr_predictor_names=model_metadata_dict[
                    traditional_cnn.NARR_PREDICTOR_NAMES_KEY],
                pressure_level_mb=model_metadata_dict[
                    traditional_cnn.PRESSURE_LEVEL_KEY],
                dilation_distance_metres=model_metadata_dict[
                    traditional_cnn.DILATION_DISTANCE_FOR_TARGET_KEY],
                num_rows_in_half_grid=model_metadata_dict[
                    traditional_cnn.NUM_ROWS_IN_HALF_GRID_KEY],
                num_columns_in_half_grid=model_metadata_dict[
                    traditional_cnn.NUM_COLUMNS_IN_HALF_GRID_KEY],
                num_classes=num_classes,
                isotonic_model_object_by_class=isotonic_model_object_by_class,
                narr_mask_matrix=model_metadata_dict[
                    traditional_cnn.NARR_MASK_MATRIX_KEY])

        this_target_matrix[this_target_matrix == -1] = 0
        print MINOR_SEPARATOR_STRING

        this_prediction_file_name = ml_utils.find_gridded_prediction_file(
            directory_name=output_dir_name,
            first_target_time_unix_sec=target_times_unix_sec[i],
            last_target_time_unix_sec=target_times_unix_sec[i],
            raise_error_if_missing=False)

        print 'Writing gridded predictions to file: "{0:s}"...'.format(
            this_prediction_file_name)

        ml_utils.write_gridded_predictions(
            pickle_file_name=this_prediction_file_name,
            class_probability_matrix=this_class_probability_matrix,
            target_times_unix_sec=target_times_unix_sec[[i]],
            model_file_name=model_file_name,
            used_isotonic_regression=use_isotonic_regression,
            target_matrix=this_target_matrix)

        if i != num_target_times - 1:
            print SEPARATOR_STRING
Beispiel #10
0
def _find_domain_for_date(top_gridrad_dir_name, spc_date_string):
    """Finds GridRad domain for the given SPC date.

    If no GridRad files are found the given the SPC date, this method returns
    None for all output variables.

    :param top_gridrad_dir_name: See documentation at top of file.
    :param spc_date_string: SPC date or convective day (format "yyyymmdd").
    :return: domain_limits_deg: length-4 numpy array with
        [min latitude, max latitude, min longitude, max longitude].
        Units are deg N for latitude, deg W for longitude.
    """

    first_time_unix_sec = time_conversion.get_start_of_spc_date(spc_date_string)
    last_time_unix_sec = time_conversion.get_end_of_spc_date(spc_date_string)
    valid_times_unix_sec = time_periods.range_and_interval_to_list(
        start_time_unix_sec=first_time_unix_sec,
        end_time_unix_sec=last_time_unix_sec,
        time_interval_sec=TIME_INTERVAL_SEC, include_endpoint=True
    )

    num_times = len(valid_times_unix_sec)
    min_latitudes_deg = numpy.full(num_times, numpy.nan)
    max_latitudes_deg = numpy.full(num_times, numpy.nan)
    min_longitudes_deg = numpy.full(num_times, numpy.nan)
    max_longitudes_deg = numpy.full(num_times, numpy.nan)

    for i in range(num_times):
        this_file_name = gridrad_io.find_file(
            unix_time_sec=valid_times_unix_sec[i],
            top_directory_name=top_gridrad_dir_name,
            raise_error_if_missing=False
        )

        if not os.path.isfile(this_file_name):
            continue

        print('Reading data from: "{0:s}"...'.format(this_file_name))
        this_metadata_dict = gridrad_io.read_metadata_from_full_grid_file(
            this_file_name
        )

        max_latitudes_deg[i] = (
            this_metadata_dict[radar_utils.NW_GRID_POINT_LAT_COLUMN]
        )
        min_longitudes_deg[i] = (
            this_metadata_dict[radar_utils.NW_GRID_POINT_LNG_COLUMN]
        )

        max_longitudes_deg[i] = min_longitudes_deg[i] + (
            (this_metadata_dict[radar_utils.NUM_LNG_COLUMN] - 1) *
            this_metadata_dict[radar_utils.LNG_SPACING_COLUMN]
        )

        min_latitudes_deg[i] = max_latitudes_deg[i] - (
            (this_metadata_dict[radar_utils.NUM_LAT_COLUMN] - 1) *
            this_metadata_dict[radar_utils.LAT_SPACING_COLUMN]
        )

    good_indices = numpy.where(numpy.invert(numpy.isnan(min_latitudes_deg)))[0]
    if len(good_indices) == 0:
        return None

    coord_matrix = numpy.vstack((
        min_latitudes_deg[good_indices], max_latitudes_deg[good_indices],
        min_longitudes_deg[good_indices], max_longitudes_deg[good_indices]
    ))
    coord_matrix, num_instances_by_row = numpy.unique(
        numpy.transpose(coord_matrix), axis=0, return_counts=True
    )

    print(coord_matrix)
    print(num_instances_by_row)

    domain_limits_deg = coord_matrix[numpy.argmax(num_instances_by_row), :]
    domain_limits_deg[2:] = -1 * lng_conversion.convert_lng_negative_in_west(
        longitudes_deg=domain_limits_deg[2:], allow_nan=False
    )

    return domain_limits_deg
Beispiel #11
0
def _run(top_input_dir_name, first_time_string, last_time_string,
         input_field_name, pressure_level_mb, top_output_dir_name):
    """Converts NARR data to a more convenient file format.

    This is effectively the main method.

    :param top_input_dir_name: See documentation at top of file.
    :param first_time_string: Same.
    :param last_time_string: Same.
    :param input_field_name: Same.
    :param pressure_level_mb: Same.
    :param top_output_dir_name: Same.
    """

    if pressure_level_mb <= 0:
        pressure_level_mb = None

    if pressure_level_mb is None:
        output_pressure_level_mb = DUMMY_PRESSURE_LEVEL_MB + 0
    else:
        output_pressure_level_mb = pressure_level_mb + 0

    first_time_unix_sec = time_conversion.string_to_unix_sec(
        first_time_string, INPUT_TIME_FORMAT)
    last_time_unix_sec = time_conversion.string_to_unix_sec(
        last_time_string, INPUT_TIME_FORMAT)

    valid_times_unix_sec = time_periods.range_and_interval_to_list(
        start_time_unix_sec=first_time_unix_sec,
        end_time_unix_sec=last_time_unix_sec,
        time_interval_sec=TIME_INTERVAL_SECONDS)

    if input_field_name == processed_narr_io.U_WIND_EARTH_RELATIVE_NAME:
        input_field_name_other = (processed_narr_io.V_WIND_EARTH_RELATIVE_NAME)
    elif input_field_name == processed_narr_io.V_WIND_EARTH_RELATIVE_NAME:
        input_field_name_other = (processed_narr_io.U_WIND_EARTH_RELATIVE_NAME)
    else:
        input_field_name_other = None

    input_field_name_grib1 = _std_to_grib1_field_name(
        field_name=input_field_name, pressure_level_mb=pressure_level_mb)

    if input_field_name in WIND_FIELD_NAMES:
        input_field_name_other_grib1 = _std_to_grib1_field_name(
            field_name=input_field_name_other,
            pressure_level_mb=pressure_level_mb)

        output_field_name = processed_narr_io.field_name_to_grid_relative(
            input_field_name)

        output_field_name_other = (
            processed_narr_io.field_name_to_grid_relative(
                input_field_name_other))

        (narr_latitude_matrix_deg, narr_longitude_matrix_deg
         ) = nwp_model_utils.get_latlng_grid_point_matrices(
             model_name=nwp_model_utils.NARR_MODEL_NAME)

        (narr_rotation_cosine_matrix,
         narr_rotation_sine_matrix) = nwp_model_utils.get_wind_rotation_angles(
             latitudes_deg=narr_latitude_matrix_deg,
             longitudes_deg=narr_longitude_matrix_deg,
             model_name=nwp_model_utils.NARR_MODEL_NAME)
    else:
        input_field_name_other_grib1 = None
        output_field_name = input_field_name + ''
        output_field_name_other = None

    num_times = len(valid_times_unix_sec)

    for i in range(num_times):
        if input_field_name in WIND_FIELD_NAMES:
            this_field_matrix_other = None

        if valid_times_unix_sec[i] > LAST_GRIB_TIME_UNIX_SEC:
            this_month_string = time_conversion.unix_sec_to_string(
                valid_times_unix_sec[i], MONTH_TIME_FORMAT)

            this_netcdf_file_name = narr_netcdf_io.find_file(
                top_directory_name=top_input_dir_name,
                field_name=input_field_name,
                month_string=this_month_string,
                is_surface=pressure_level_mb is None)

            print 'Reading data from: "{0:s}"...'.format(this_netcdf_file_name)
            this_field_matrix = narr_netcdf_io.read_file(
                netcdf_file_name=this_netcdf_file_name,
                field_name=input_field_name,
                valid_time_unix_sec=valid_times_unix_sec[i],
                pressure_level_mb=pressure_level_mb)

            if input_field_name in WIND_FIELD_NAMES:
                this_netcdf_file_name_other = narr_netcdf_io.find_file(
                    top_directory_name=top_input_dir_name,
                    field_name=input_field_name_other,
                    month_string=this_month_string,
                    is_surface=pressure_level_mb is None)

                print 'Reading data from: "{0:s}"...'.format(
                    this_netcdf_file_name_other)

                this_field_matrix_other = narr_netcdf_io.read_file(
                    netcdf_file_name=this_netcdf_file_name_other,
                    field_name=input_field_name_other,
                    valid_time_unix_sec=valid_times_unix_sec[i],
                    pressure_level_mb=pressure_level_mb)
        else:
            this_grib_file_name = nwp_model_io.find_grib_file(
                top_directory_name=top_input_dir_name,
                model_name=nwp_model_utils.NARR_MODEL_NAME,
                init_time_unix_sec=valid_times_unix_sec[i],
                lead_time_hours=0)

            print 'Reading data from: "{0:s}"...'.format(this_grib_file_name)
            this_field_matrix = nwp_model_io.read_field_from_grib_file(
                grib_file_name=this_grib_file_name,
                field_name_grib1=input_field_name_grib1,
                model_name=nwp_model_utils.NARR_MODEL_NAME,
                wgrib_exe_name=WGRIB_EXE_NAME,
                wgrib2_exe_name=WGRIB2_EXE_NAME)

            if input_field_name in WIND_FIELD_NAMES:
                this_field_matrix_other = (
                    nwp_model_io.read_field_from_grib_file(
                        grib_file_name=this_grib_file_name,
                        field_name_grib1=input_field_name_other_grib1,
                        model_name=nwp_model_utils.NARR_MODEL_NAME,
                        wgrib_exe_name=WGRIB_EXE_NAME,
                        wgrib2_exe_name=WGRIB2_EXE_NAME))

        if input_field_name in WIND_FIELD_NAMES:
            print 'Rotating Earth-relative winds to grid-relative...'

            if input_field_name == processed_narr_io.U_WIND_EARTH_RELATIVE_NAME:
                this_field_matrix, this_field_matrix_other = (
                    nwp_model_utils.rotate_winds_to_grid_relative(
                        u_winds_earth_relative_m_s01=this_field_matrix,
                        v_winds_earth_relative_m_s01=this_field_matrix_other,
                        rotation_angle_cosines=narr_rotation_cosine_matrix,
                        rotation_angle_sines=narr_rotation_sine_matrix))
            else:
                this_field_matrix_other, this_field_matrix = (
                    nwp_model_utils.rotate_winds_to_grid_relative(
                        u_winds_earth_relative_m_s01=this_field_matrix_other,
                        v_winds_earth_relative_m_s01=this_field_matrix,
                        rotation_angle_cosines=narr_rotation_cosine_matrix,
                        rotation_angle_sines=narr_rotation_sine_matrix))

        this_output_file_name = processed_narr_io.find_file_for_one_time(
            top_directory_name=top_output_dir_name,
            field_name=output_field_name,
            pressure_level_mb=output_pressure_level_mb,
            valid_time_unix_sec=valid_times_unix_sec[i],
            raise_error_if_missing=False)

        print 'Writing processed data to: "{0:s}"...'.format(
            this_output_file_name)

        processed_narr_io.write_fields_to_file(
            pickle_file_name=this_output_file_name,
            field_matrix=numpy.expand_dims(this_field_matrix, axis=0),
            field_name=output_field_name,
            pressure_level_pascals=output_pressure_level_mb * MB_TO_PASCALS,
            valid_times_unix_sec=valid_times_unix_sec[[i]])

        if input_field_name not in WIND_FIELD_NAMES:
            print '\n'
            continue

        this_output_file_name = processed_narr_io.find_file_for_one_time(
            top_directory_name=top_output_dir_name,
            field_name=output_field_name_other,
            pressure_level_mb=output_pressure_level_mb,
            valid_time_unix_sec=valid_times_unix_sec[i],
            raise_error_if_missing=False)

        print 'Writing processed data to: "{0:s}"...\n'.format(
            this_output_file_name)

        processed_narr_io.write_fields_to_file(
            pickle_file_name=this_output_file_name,
            field_matrix=numpy.expand_dims(this_field_matrix_other, axis=0),
            field_name=output_field_name_other,
            pressure_level_pascals=output_pressure_level_mb * MB_TO_PASCALS,
            valid_times_unix_sec=valid_times_unix_sec[[i]])
def _link_one_warning(warning_table,
                      storm_object_table,
                      max_distance_metres,
                      min_lifetime_fraction,
                      test_mode=False):
    """Links one warning to nearest storm.

    :param warning_table: pandas DataFrame with one row and the following
        columns.
    warning_table.start_time_unix_sec: Start time.
    warning_table.end_time_unix_sec: End time.
    warning_table.polygon_object_latlng: Polygon (instance of
        `shapely.geometry.Polygon`) with lat-long coordinates of warning
        boundary.
    warning_table.polygon_object_xy: Polygon (instance of
        `shapely.geometry.Polygon`) with x-y coordinates of warning boundary.

    :param storm_object_table: pandas DataFrame returned by
        `storm_tracking_io.read_file`.
    :param max_distance_metres: See documentation at top of file.
    :param min_lifetime_fraction: Same.
    :param test_mode: Never mind.  Just leave this alone.
    :return: secondary_id_strings: 1-D list of secondary IDs for storms to which
        warning is linked.  If warning is not linked to a storm, this is empty.
    """

    warning_start_time_unix_sec = (
        warning_table[WARNING_START_TIME_KEY].values[0])
    warning_end_time_unix_sec = warning_table[WARNING_END_TIME_KEY].values[0]
    warning_polygon_object_xy = warning_table[WARNING_XY_POLYGON_KEY].values[0]

    orig_num_storm_objects = len(storm_object_table.index)

    storm_object_table = linkage._filter_storms_by_time(
        storm_object_table=storm_object_table,
        max_start_time_unix_sec=warning_end_time_unix_sec + 720,
        min_end_time_unix_sec=warning_start_time_unix_sec - 720)

    num_storm_objects = len(storm_object_table.index)
    print('Filtering by time removed {0:d} of {1:d} storm objects.'.format(
        orig_num_storm_objects - num_storm_objects, orig_num_storm_objects))

    orig_num_storm_objects = num_storm_objects + 0

    storm_object_table = _remove_far_away_storms(
        warning_polygon_object_latlng=warning_table[WARNING_LATLNG_POLYGON_KEY]
        .values[0],
        storm_object_table=storm_object_table)

    num_storm_objects = len(storm_object_table.index)
    print('Filtering by distance removed {0:d} of {1:d} storm objects.'.format(
        orig_num_storm_objects - num_storm_objects, orig_num_storm_objects))

    warning_times_unix_sec = time_periods.range_and_interval_to_list(
        start_time_unix_sec=warning_start_time_unix_sec,
        end_time_unix_sec=warning_end_time_unix_sec,
        time_interval_sec=1 if test_mode else 60,
        include_endpoint=True)

    unique_sec_id_strings = numpy.unique(
        storm_object_table[tracking_utils.SECONDARY_ID_COLUMN].values)

    num_sec_id_strings = len(unique_sec_id_strings)
    num_warning_times = len(warning_times_unix_sec)
    distance_matrix_metres = numpy.full(
        (num_sec_id_strings, num_warning_times), numpy.nan)

    for j in range(num_warning_times):
        this_interp_vertex_table = linkage._interp_storms_in_time(
            storm_object_table=storm_object_table,
            target_time_unix_sec=warning_times_unix_sec[j],
            max_time_before_start_sec=0 if test_mode else 180,
            max_time_after_end_sec=0 if test_mode else 180)

        for i in range(num_sec_id_strings):
            these_indices = numpy.where(
                this_interp_vertex_table[tracking_utils.SECONDARY_ID_COLUMN].
                values == unique_sec_id_strings[i])[0]

            if len(these_indices) == 0:
                continue

            these_x_metres = this_interp_vertex_table[
                linkage.STORM_VERTEX_X_COLUMN].values[these_indices]

            these_y_metres = this_interp_vertex_table[
                linkage.STORM_VERTEX_Y_COLUMN].values[these_indices]

            distance_matrix_metres[i, j] = _find_one_centroid_distance(
                storm_x_vertices_metres=these_x_metres,
                storm_y_vertices_metres=these_y_metres,
                warning_polygon_object_xy=warning_polygon_object_xy)

    lifetime_fractions = (
        1. - numpy.mean(numpy.isnan(distance_matrix_metres), axis=1))
    bad_indices = numpy.where(lifetime_fractions < min_lifetime_fraction)[0]
    distance_matrix_metres[bad_indices, ...] = LARGE_NUMBER

    mean_distances_metres = numpy.nanmean(distance_matrix_metres, axis=1)
    good_indices = numpy.where(mean_distances_metres <= max_distance_metres)[0]

    print((
        'Linked warning to {0:d} storms.  All distances (metres) printed below:'
        '\n{1:s}').format(len(good_indices), str(mean_distances_metres)))

    return [unique_sec_id_strings[k] for k in good_indices]
Beispiel #13
0
def downsized_examples_to_eval_pairs(
        model_object, first_target_time_unix_sec, last_target_time_unix_sec,
        num_target_times_to_sample, num_examples_per_time,
        top_narr_directory_name, top_frontal_grid_dir_name,
        narr_predictor_names, pressure_level_mb, dilation_distance_metres,
        num_rows_in_half_grid, num_columns_in_half_grid, num_classes,
        predictor_time_step_offsets=None, num_lead_time_steps=None,
        isotonic_model_object_by_class=None, narr_mask_matrix=None):
    """Creates evaluation pairs from downsized 3-D or 4-D examples.

    M = number of pixel rows in full NARR grid
    N = number of pixel columns in full NARR grid

    m = number of pixel rows in each downsized grid
      = 2 * num_rows_in_half_grid + 1
    n = number of pixel columns in each downsized grid
      = 2 * num_columns_in_half_grid + 1

    P = number of evaluation pairs created by this method
    K = number of classes

    :param model_object: Instance of `keras.models.Model`.  This will be applied
        to each downsized example, creating the prediction for said example.
    :param first_target_time_unix_sec: Target time.  Downsized examples will be
        randomly chosen from the period `first_target_time_unix_sec`...
        `last_target_time_unix_sec`.
    :param last_target_time_unix_sec: See above.
    :param num_target_times_to_sample: Number of target times to sample (from
        the period `first_target_time_unix_sec`...`last_target_time_unix_sec`).
    :param num_examples_per_time: Number of downsized examples per target time.
        Downsized examples will be randomly drawn from each target time.
    :param top_narr_directory_name: Name of top-level directory with NARR data
        (one file for each variable, pressure level, and time step).
    :param top_frontal_grid_dir_name: Name of top-level directory with frontal
        grids (one file per time step).
    :param narr_predictor_names: 1-D list of NARR fields to use as predictors.
    :param pressure_level_mb: Pressure level (millibars).
    :param dilation_distance_metres: Dilation distance for both warm and cold
        fronts.
    :param num_rows_in_half_grid: See general discussion above.
    :param num_columns_in_half_grid: See general discussion above.
    :param num_classes: Number of classes.
    :param predictor_time_step_offsets: [needed only if examples are 4-D]
        length-T numpy array of offsets between predictor times and
        (target time - lead time).
    :param num_lead_time_steps: [needed only if examples are 4-D]
        Number of time steps between latest predictor time (last image in the
        sequence) and target time.
    :param isotonic_model_object_by_class: length-K list with trained instances
        of `sklearn.isotonic.IsotonicRegression`.  If None, will omit isotonic
        regression.
    :param narr_mask_matrix: M-by-N numpy array of integers (0 or 1).  If
        narr_mask_matrix[i, j] = 0, cell [i, j] in the full grid will never be
        used to create an evaluation pair -- i.e., will never be used as the
        center of a downsized grid.  If `narr_mask_matrix is None`, any cell in
        the full grid can be used to create an evaluation pair.
    :return: class_probability_matrix: See documentation for
        `check_evaluation_pairs`.
    :return: observed_labels: See doc for `check_evaluation_pairs`.
    """

    error_checking.assert_is_integer(num_target_times_to_sample)
    error_checking.assert_is_greater(num_target_times_to_sample, 0)
    error_checking.assert_is_integer(num_examples_per_time)
    error_checking.assert_is_greater(num_examples_per_time, 0)
    error_checking.assert_is_integer(num_classes)
    error_checking.assert_is_geq(num_classes, 2)

    if predictor_time_step_offsets is None:
        num_dimensions_per_example = 3
    else:
        num_dimensions_per_example = 4

    target_times_unix_sec = time_periods.range_and_interval_to_list(
        start_time_unix_sec=first_target_time_unix_sec,
        end_time_unix_sec=last_target_time_unix_sec,
        time_interval_sec=NARR_TIME_INTERVAL_SECONDS, include_endpoint=True)

    numpy.random.shuffle(target_times_unix_sec)
    target_times_unix_sec = target_times_unix_sec[:num_target_times_to_sample]
    target_time_strings = [
        time_conversion.unix_sec_to_string(t, TIME_FORMAT_FOR_LOG_MESSAGES)
        for t in target_times_unix_sec
    ]

    class_probability_matrix = numpy.full(
        (num_target_times_to_sample, num_examples_per_time, num_classes),
        numpy.nan)
    observed_labels = numpy.full(
        (num_target_times_to_sample, num_examples_per_time), -1, dtype=int)

    for i in range(num_target_times_to_sample):
        print 'Drawing evaluation pairs from {0:s}...'.format(
            target_time_strings[i])

        (these_center_row_indices, these_center_column_indices
        ) = _get_random_sample_points(
            num_points=num_examples_per_time, for_downsized_examples=True,
            narr_mask_matrix=narr_mask_matrix)

        if num_dimensions_per_example == 3:
            (this_downsized_predictor_matrix, observed_labels[i, :], _, _
            ) = testing_io.create_downsized_3d_examples(
                center_row_indices=these_center_row_indices,
                center_column_indices=these_center_column_indices,
                num_rows_in_half_grid=num_rows_in_half_grid,
                num_columns_in_half_grid=num_columns_in_half_grid,
                target_time_unix_sec=target_times_unix_sec[i],
                top_narr_directory_name=top_narr_directory_name,
                top_frontal_grid_dir_name=top_frontal_grid_dir_name,
                narr_predictor_names=narr_predictor_names,
                pressure_level_mb=pressure_level_mb,
                dilation_distance_metres=dilation_distance_metres,
                num_classes=num_classes)

        else:
            (this_downsized_predictor_matrix, observed_labels[i, :], _, _
            ) = testing_io.create_downsized_4d_examples(
                center_row_indices=these_center_row_indices,
                center_column_indices=these_center_column_indices,
                num_rows_in_half_grid=num_rows_in_half_grid,
                num_columns_in_half_grid=num_columns_in_half_grid,
                target_time_unix_sec=target_times_unix_sec[i],
                predictor_time_step_offsets=predictor_time_step_offsets,
                num_lead_time_steps=num_lead_time_steps,
                top_narr_directory_name=top_narr_directory_name,
                top_frontal_grid_dir_name=top_frontal_grid_dir_name,
                narr_predictor_names=narr_predictor_names,
                pressure_level_mb=pressure_level_mb,
                dilation_distance_metres=dilation_distance_metres,
                num_classes=num_classes)

        class_probability_matrix[i, ...] = model_object.predict(
            this_downsized_predictor_matrix, batch_size=num_examples_per_time)

    new_dimensions = (
        num_target_times_to_sample * num_examples_per_time, num_classes
    )
    class_probability_matrix = numpy.reshape(
        class_probability_matrix, new_dimensions)
    observed_labels = numpy.reshape(observed_labels, observed_labels.size)

    if isotonic_model_object_by_class is not None:
        class_probability_matrix = (
            isotonic_regression.apply_model_for_each_class(
                orig_class_probability_matrix=class_probability_matrix,
                observed_labels=observed_labels,
                model_object_by_class=isotonic_model_object_by_class))

    return class_probability_matrix, observed_labels
def _find_io_files_for_renaming(top_input_dir_name, first_date_unix_sec,
                                last_date_unix_sec, top_output_dir_name):
    """Finds input and output files for renaming storms.

    N = number of dates

    :param top_input_dir_name: See documentation for `rename_storms.`
    :param first_date_unix_sec: Same.
    :param last_date_unix_sec: Same.
    :param top_output_dir_name: Same.
    :return: input_file_names_by_date: length-N list, where the [i]th item is a
        numpy array of paths to input files for the [i]th date.
    :return: output_file_names_by_date: Same as above, but for output files.
    :return: valid_times_by_date_unix_sec: Same as above, but for valid times.
        All 3 arrays for the [i]th date have the same length.
    """

    dates_unix_sec = time_periods.range_and_interval_to_list(
        start_time_unix_sec=first_date_unix_sec,
        end_time_unix_sec=last_date_unix_sec,
        time_interval_sec=DAYS_TO_SECONDS,
        include_endpoint=True)
    date_strings = [
        time_conversion.unix_sec_to_string(t, DATE_FORMAT)
        for t in dates_unix_sec
    ]

    num_dates = len(date_strings)
    input_file_names_by_date = [numpy.array([], dtype=object)] * num_dates
    output_file_names_by_date = [numpy.array([], dtype=object)] * num_dates
    valid_times_by_date_unix_sec = [numpy.array([], dtype=int)] * num_dates

    for i in range(num_dates):
        print 'Finding input files for date {0:s}...'.format(date_strings[i])

        (these_input_file_names,
         _) = tracking_io.find_processed_files_one_spc_date(
             spc_date_string=date_strings[i],
             data_source=tracking_utils.PROBSEVERE_SOURCE_ID,
             top_processed_dir_name=top_input_dir_name,
             tracking_scale_metres2=DUMMY_TRACKING_SCALE_METRES2,
             raise_error_if_missing=True)

        these_input_file_names.sort()
        these_valid_times_unix_sec = numpy.array([
            tracking_io.processed_file_name_to_time(f)
            for f in these_input_file_names
        ],
                                                 dtype=int)

        these_output_file_names = []
        for t in these_valid_times_unix_sec:
            these_output_file_names.append(
                tracking_io.find_processed_file(
                    unix_time_sec=t,
                    data_source=tracking_utils.PROBSEVERE_SOURCE_ID,
                    top_processed_dir_name=top_output_dir_name,
                    tracking_scale_metres2=DUMMY_TRACKING_SCALE_METRES2,
                    raise_error_if_missing=False))

        input_file_names_by_date[i] = numpy.array(these_input_file_names,
                                                  dtype=object)
        output_file_names_by_date[i] = numpy.array(these_output_file_names,
                                                   dtype=object)
        valid_times_by_date_unix_sec[i] = these_valid_times_unix_sec

    print SEPARATOR_STRING
    return (input_file_names_by_date, output_file_names_by_date,
            valid_times_by_date_unix_sec)
Beispiel #15
0
def _run(top_frontal_grid_dir_name, first_time_string, last_time_string,
         dilation_distance_metres, min_num_fronts, output_dir_name):
    """Creates mask, indicating where human forecasters usually draw fronts.

    This is effectively the main method.

    :param top_frontal_grid_dir_name: See documentation at top of file.
    :param first_time_string: Same.
    :param last_time_string: Same.
    :param dilation_distance_metres: Same.
    :param min_num_fronts: Same.
    :param output_dir_name: Same.
    """

    file_system_utils.mkdir_recursive_if_necessary(
        directory_name=output_dir_name)
    error_checking.assert_is_greater(min_num_fronts, 0)

    first_time_unix_sec = time_conversion.string_to_unix_sec(
        first_time_string, INPUT_TIME_FORMAT)
    last_time_unix_sec = time_conversion.string_to_unix_sec(
        last_time_string, INPUT_TIME_FORMAT)
    valid_times_unix_sec = time_periods.range_and_interval_to_list(
        start_time_unix_sec=first_time_unix_sec,
        end_time_unix_sec=last_time_unix_sec,
        time_interval_sec=TIME_INTERVAL_SECONDS)

    num_times = len(valid_times_unix_sec)
    num_grid_rows, num_grid_columns = nwp_model_utils.get_grid_dimensions(
        model_name=nwp_model_utils.NARR_MODEL_NAME)

    num_cold_fronts_matrix = None
    num_warm_fronts_matrix = None

    for i in range(num_times):
        this_file_name = fronts_io.find_file_for_one_time(
            top_directory_name=top_frontal_grid_dir_name,
            file_type=fronts_io.GRIDDED_FILE_TYPE,
            valid_time_unix_sec=valid_times_unix_sec[i],
            raise_error_if_missing=False)
        if not os.path.isfile(this_file_name):
            warning_string = ('POTENTIAL PROBLEM.  Cannot find file: "{0:s}"'
                              ).format(this_file_name)
            warnings.warn(warning_string)
            continue

        print 'Reading data from: "{0:s}"...'.format(this_file_name)
        this_frontal_grid_table = fronts_io.read_narr_grids_from_file(
            this_file_name)

        this_frontal_grid_matrix = ml_utils.front_table_to_images(
            frontal_grid_table=this_frontal_grid_table,
            num_rows_per_image=num_grid_rows,
            num_columns_per_image=num_grid_columns)

        this_frontal_grid_matrix = ml_utils.dilate_ternary_target_images(
            target_matrix=this_frontal_grid_matrix,
            dilation_distance_metres=dilation_distance_metres,
            verbose=False)
        this_frontal_grid_matrix = this_frontal_grid_matrix[0, ...]

        this_num_cold_fronts_matrix = (this_frontal_grid_matrix == front_utils.
                                       COLD_FRONT_INTEGER_ID).astype(int)
        this_num_warm_fronts_matrix = (this_frontal_grid_matrix == front_utils.
                                       WARM_FRONT_INTEGER_ID).astype(int)

        if num_cold_fronts_matrix is None:
            num_cold_fronts_matrix = this_num_cold_fronts_matrix + 0
            num_warm_fronts_matrix = this_num_warm_fronts_matrix + 0
        else:
            num_cold_fronts_matrix = (num_cold_fronts_matrix +
                                      this_num_cold_fronts_matrix)
            num_warm_fronts_matrix = (num_warm_fronts_matrix +
                                      this_num_warm_fronts_matrix)

    print SEPARATOR_STRING

    print 'Masking out grid cells with < {0:d} fronts...'.format(
        min_num_fronts)
    num_both_fronts_matrix = num_warm_fronts_matrix + num_cold_fronts_matrix
    mask_matrix = (num_both_fronts_matrix >= min_num_fronts).astype(int)

    pickle_file_name = '{0:s}/narr_mask.p'.format(output_dir_name)
    print 'Writing mask to: "{0:s}"...'.format(pickle_file_name)
    ml_utils.write_narr_mask(mask_matrix=mask_matrix,
                             pickle_file_name=pickle_file_name)

    warm_front_map_file_name = '{0:s}/num_warm_fronts.jpg'.format(
        output_dir_name)
    _plot_front_densities(num_fronts_matrix=num_warm_fronts_matrix,
                          colour_map_object=WARM_FRONT_COLOUR_MAP_OBJECT,
                          title_string='Number of warm fronts',
                          annotation_string='(a)',
                          output_file_name=warm_front_map_file_name,
                          mask_matrix=None,
                          add_colour_bar=True)

    cold_front_map_file_name = '{0:s}/num_cold_fronts.jpg'.format(
        output_dir_name)
    _plot_front_densities(num_fronts_matrix=num_cold_fronts_matrix,
                          colour_map_object=COLD_FRONT_COLOUR_MAP_OBJECT,
                          title_string='Number of cold fronts',
                          annotation_string='(b)',
                          output_file_name=cold_front_map_file_name,
                          mask_matrix=None,
                          add_colour_bar=True)

    both_fronts_title_string = 'Grid cells with at least {0:d} fronts'.format(
        min_num_fronts)
    both_fronts_map_file_name = '{0:s}/num_both_fronts.jpg'.format(
        output_dir_name)
    num_both_fronts_matrix[num_both_fronts_matrix > 1] = 1

    _plot_front_densities(num_fronts_matrix=num_both_fronts_matrix,
                          colour_map_object=BOTH_FRONTS_COLOUR_MAP_OBJECT,
                          title_string=both_fronts_title_string,
                          annotation_string='(c)',
                          output_file_name=both_fronts_map_file_name,
                          mask_matrix=mask_matrix,
                          add_colour_bar=False)
Beispiel #16
0
def find_raw_azimuthal_shear_file(
        desired_time_unix_sec, spc_date_unix_sec, field_name, data_source,
        top_directory_name,
        max_time_offset_sec=DEFAULT_MAX_TIME_OFFSET_FOR_AZ_SHEAR_SEC,
        raise_error_if_missing=False):
    """Finds raw azimuthal-shear file on local machine.

    If you know the exact time step for azimuthal shear, use find_raw_file.
    However, azimuthal shear is "special" and its times are often offset from
    those of other radar fields.  This method accounts for that and finds
    az-shear files within some offset of the desired time.

    :param desired_time_unix_sec: Desired time for azimuthal shear.
    :param spc_date_unix_sec: SPC date.
    :param field_name: Field name in GewitterGefahr format (should match either
        `LOW_LEVEL_SHEAR_NAME` or `MID_LEVEL_SHEAR_NAME`).
    :param data_source: Data source (either "myrorss" or "mrms").
    :param top_directory_name: Name of top-level directory with raw MYRORSS
        files.
    :param raise_error_if_missing: Boolean flag.  If True and no az-shear file
        can be found within `max_time_offset_sec` of `desired_time_unix_sec`,
        will raise error.  If False and no az-shear file can be found within
        `max_time_offset_sec` of `desired_time_unix_sec`, will return None.
    :return: raw_file_name: Path to raw az-shear file.  If file is missing and
        raise_error_if_missing = False, this is the *expected* path.
    :raises: ValueError: if raise_error_if_missing = True and file is missing.
    """

    error_checking.assert_is_integer(desired_time_unix_sec)
    error_checking.assert_is_integer(max_time_offset_sec)
    error_checking.assert_is_greater(max_time_offset_sec, 0)
    error_checking.assert_is_boolean(raise_error_if_missing)

    first_allowed_minute_unix_sec = numpy.round(int(rounder.floor_to_nearest(
        float(desired_time_unix_sec - max_time_offset_sec),
        MINUTES_TO_SECONDS)))
    last_allowed_minute_unix_sec = numpy.round(int(rounder.floor_to_nearest(
        float(desired_time_unix_sec + max_time_offset_sec),
        MINUTES_TO_SECONDS)))

    allowed_minutes_unix_sec = time_periods.range_and_interval_to_list(
        start_time_unix_sec=first_allowed_minute_unix_sec,
        end_time_unix_sec=last_allowed_minute_unix_sec,
        time_interval_sec=MINUTES_TO_SECONDS, include_endpoint=True).astype(int)

    spc_date_string = time_conversion.time_to_spc_date_string(spc_date_unix_sec)
    relative_directory_name = get_relative_dir_for_raw_files(
        field_name=field_name, data_source=data_source)

    raw_file_names = []
    for this_time_unix_sec in allowed_minutes_unix_sec:
        this_pathless_file_pattern = _get_pathless_raw_file_pattern(
            this_time_unix_sec)
        this_file_pattern = '{0:s}/{1:s}/{2:s}/{3:s}'.format(
            top_directory_name, spc_date_string, relative_directory_name,
            this_pathless_file_pattern)
        raw_file_names += glob.glob(this_file_pattern)

    file_times_unix_sec = []
    for this_raw_file_name in raw_file_names:
        _, this_pathless_file_name = os.path.split(this_raw_file_name)
        this_time_string, ending = os.path.splitext(this_pathless_file_name)
        if (ending.rfind("gz") > -1):
            this_time_string, ending = os.path.splitext(this_time_string)
        file_times_unix_sec.append(time_conversion.string_to_unix_sec(
            this_time_string, TIME_FORMAT_SECONDS))

    if len(file_times_unix_sec):
        file_times_unix_sec = numpy.array(file_times_unix_sec)
        time_differences_sec = numpy.absolute(
            file_times_unix_sec - desired_time_unix_sec)
        nearest_index = numpy.argmin(time_differences_sec)
        min_time_diff_sec = time_differences_sec[nearest_index]
    else:
        min_time_diff_sec = numpy.inf

    if min_time_diff_sec > max_time_offset_sec:
        if raise_error_if_missing:
            desired_time_string = time_conversion.unix_sec_to_string(
                desired_time_unix_sec, TIME_FORMAT_FOR_LOG_MESSAGES)
            log_string = ('Could not find "{0:s}" file within {1:d} seconds of '
                          '{2:s}').format(field_name, max_time_offset_sec,
                                          desired_time_string)
            raise ValueError(log_string)

        return None

    return raw_file_names[nearest_index]
def _run(prediction_dir_name_by_model, model_weights, first_time_string,
         last_time_string, output_prediction_dir_name):
    """Ensembles predictions from two or more NFA models.

    This is effectively the main method.

    :param prediction_dir_name_by_model: See documentation at top of file.
    :param model_weights: Same.
    :param first_time_string: Same.
    :param last_time_string: Same.
    :param output_prediction_dir_name: Same.
    """

    error_checking.assert_is_geq_numpy_array(model_weights, 0.)
    error_checking.assert_is_leq_numpy_array(model_weights, 1.)
    error_checking.assert_is_geq(numpy.sum(model_weights), 1. - TOLERANCE)
    error_checking.assert_is_leq(numpy.sum(model_weights), 1. + TOLERANCE)

    num_models = len(model_weights)
    error_checking.assert_is_geq(num_models, 2)

    these_expected_dim = numpy.array([num_models], dtype=int)
    error_checking.assert_is_numpy_array(
        numpy.array(prediction_dir_name_by_model),
        exact_dimensions=these_expected_dim)

    first_time_unix_sec = time_conversion.string_to_unix_sec(
        first_time_string, INPUT_TIME_FORMAT)
    last_time_unix_sec = time_conversion.string_to_unix_sec(
        last_time_string, INPUT_TIME_FORMAT)

    possible_times_unix_sec = time_periods.range_and_interval_to_list(
        start_time_unix_sec=first_time_unix_sec,
        end_time_unix_sec=last_time_unix_sec,
        time_interval_sec=NARR_TIME_INTERVAL_SECONDS,
        include_endpoint=True)

    narr_mask_matrix = None

    for this_time_unix_sec in possible_times_unix_sec:
        these_prediction_file_names = [''] * num_models

        for j in range(num_models):
            these_prediction_file_names[j] = nfa.find_prediction_file(
                directory_name=prediction_dir_name_by_model[j],
                first_valid_time_unix_sec=this_time_unix_sec,
                last_valid_time_unix_sec=this_time_unix_sec,
                ensembled=False,
                raise_error_if_missing=j > 0)

            if not os.path.isfile(these_prediction_file_names[j]):
                break

        if these_prediction_file_names[-1] == '':
            continue

        this_class_probability_matrix = None

        for j in range(num_models):
            print 'Reading data from: "{0:s}"...'.format(
                these_prediction_file_names[j])

            this_predicted_label_matrix, this_metadata_dict = (
                nfa.read_gridded_predictions(these_prediction_file_names[j]))

            if narr_mask_matrix is None:
                narr_mask_matrix = this_metadata_dict[nfa.NARR_MASK_KEY] + 0

            new_class_probability_matrix = to_categorical(
                y=this_predicted_label_matrix,
                num_classes=NUM_CLASSES).astype(float)

            new_class_probability_matrix = (model_weights[j] *
                                            new_class_probability_matrix)

            if this_class_probability_matrix is None:
                this_class_probability_matrix = (new_class_probability_matrix +
                                                 0.)
            else:
                this_class_probability_matrix = (
                    this_class_probability_matrix +
                    new_class_probability_matrix)

        this_output_file_name = nfa.find_prediction_file(
            directory_name=output_prediction_dir_name,
            first_valid_time_unix_sec=this_time_unix_sec,
            last_valid_time_unix_sec=this_time_unix_sec,
            ensembled=True,
            raise_error_if_missing=False)

        print 'Writing ensembled predictions to: "{0:s}"...\n'.format(
            this_output_file_name)

        nfa.write_ensembled_predictions(
            pickle_file_name=this_output_file_name,
            class_probability_matrix=this_class_probability_matrix,
            valid_times_unix_sec=numpy.array([this_time_unix_sec], dtype=int),
            narr_mask_matrix=narr_mask_matrix,
            prediction_dir_name_by_model=prediction_dir_name_by_model,
            model_weights=model_weights)
Beispiel #18
0
def _run(input_prediction_dir_name, first_time_string, last_time_string,
         num_times, binarization_threshold, min_object_area_metres2,
         min_endpoint_length_metres, top_front_line_dir_name,
         output_file_name):
    """Converts gridded CNN predictions to objects.

    This is effectively the main method.

    :param input_prediction_dir_name: See documentation at top of file.
    :param first_time_string: Same.
    :param last_time_string: Same.
    :param num_times: Same.
    :param binarization_threshold: Same.
    :param min_object_area_metres2: Same.
    :param min_endpoint_length_metres: Same.
    :param top_front_line_dir_name: Same.
    :param output_file_name: Same.
    """

    grid_spacing_metres = nwp_model_utils.get_xy_grid_spacing(
        model_name=nwp_model_utils.NARR_MODEL_NAME)[0]
    num_grid_rows, num_grid_columns = nwp_model_utils.get_grid_dimensions(
        model_name=nwp_model_utils.NARR_MODEL_NAME)

    first_time_unix_sec = time_conversion.string_to_unix_sec(
        first_time_string, INPUT_TIME_FORMAT)
    last_time_unix_sec = time_conversion.string_to_unix_sec(
        last_time_string, INPUT_TIME_FORMAT)
    possible_times_unix_sec = time_periods.range_and_interval_to_list(
        start_time_unix_sec=first_time_unix_sec,
        end_time_unix_sec=last_time_unix_sec,
        time_interval_sec=NARR_TIME_INTERVAL_SECONDS,
        include_endpoint=True)

    numpy.random.shuffle(possible_times_unix_sec)

    unix_times_sec = []
    list_of_predicted_region_tables = []
    num_times_done = 0
    narr_mask_matrix = None

    for i in range(len(possible_times_unix_sec)):
        if num_times_done == num_times:
            break

        this_prediction_file_name = ml_utils.find_gridded_prediction_file(
            directory_name=input_prediction_dir_name,
            first_target_time_unix_sec=possible_times_unix_sec[i],
            last_target_time_unix_sec=possible_times_unix_sec[i],
            raise_error_if_missing=False)
        if not os.path.isfile(this_prediction_file_name):
            continue

        num_times_done += 1
        unix_times_sec.append(possible_times_unix_sec[i])

        print 'Reading data from: "{0:s}"...'.format(this_prediction_file_name)
        this_prediction_dict = ml_utils.read_gridded_predictions(
            this_prediction_file_name)

        class_probability_matrix = this_prediction_dict[
            ml_utils.PROBABILITY_MATRIX_KEY]

        if narr_mask_matrix is None:
            narr_mask_matrix = numpy.invert(
                numpy.isnan(class_probability_matrix[0, ..., 0])).astype(int)

        # TODO(thunderhoser): This should be a separate method.
        class_probability_matrix[..., front_utils.NO_FRONT_INTEGER_ID][
            numpy.isnan(class_probability_matrix[
                ..., front_utils.NO_FRONT_INTEGER_ID])] = 1.
        class_probability_matrix[numpy.isnan(class_probability_matrix)] = 0.

        print 'Determinizing probabilities...'
        this_predicted_label_matrix = object_eval.determinize_probabilities(
            class_probability_matrix=this_prediction_dict[
                ml_utils.PROBABILITY_MATRIX_KEY],
            binarization_threshold=binarization_threshold)

        print 'Converting image to frontal regions...'
        list_of_predicted_region_tables.append(
            object_eval.images_to_regions(
                predicted_label_matrix=this_predicted_label_matrix,
                image_times_unix_sec=possible_times_unix_sec[[i]]))

        print 'Throwing out frontal regions with area < {0:f} km^2...'.format(
            METRES2_TO_KM2 * min_object_area_metres2)
        list_of_predicted_region_tables[
            -1] = object_eval.discard_regions_with_small_area(
                predicted_region_table=list_of_predicted_region_tables[-1],
                x_grid_spacing_metres=grid_spacing_metres,
                y_grid_spacing_metres=grid_spacing_metres,
                min_area_metres2=min_object_area_metres2)

        print 'Skeletonizing frontal regions...'
        list_of_predicted_region_tables[
            -1] = object_eval.skeletonize_frontal_regions(
                predicted_region_table=list_of_predicted_region_tables[-1],
                num_grid_rows=num_grid_rows,
                num_grid_columns=num_grid_columns)

        list_of_predicted_region_tables[-1] = object_eval.find_main_skeletons(
            predicted_region_table=list_of_predicted_region_tables[-1],
            image_times_unix_sec=possible_times_unix_sec[[i]],
            num_grid_rows=num_grid_rows,
            num_grid_columns=num_grid_columns,
            x_grid_spacing_metres=grid_spacing_metres,
            y_grid_spacing_metres=grid_spacing_metres,
            min_endpoint_length_metres=min_endpoint_length_metres)

        if num_times_done != num_times:
            print '\n'

        if len(list_of_predicted_region_tables) == 1:
            continue

        list_of_predicted_region_tables[-1] = (
            list_of_predicted_region_tables[-1].align(
                list_of_predicted_region_tables[0], axis=1)[0])

    print SEPARATOR_STRING

    unix_times_sec = numpy.array(unix_times_sec, dtype=int)
    predicted_region_table = pandas.concat(list_of_predicted_region_tables,
                                           axis=0,
                                           ignore_index=True)
    predicted_region_table = object_eval.convert_regions_rowcol_to_narr_xy(
        predicted_region_table=predicted_region_table,
        are_predictions_from_fcn=False)

    actual_polyline_table = _read_actual_polylines(
        top_input_dir_name=top_front_line_dir_name,
        unix_times_sec=unix_times_sec,
        narr_mask_matrix=narr_mask_matrix)
    print SEPARATOR_STRING

    actual_polyline_table = object_eval.project_polylines_latlng_to_narr(
        actual_polyline_table)

    print 'Writing predicted and observed objects to: "{0:s}"...'.format(
        output_file_name)
    object_eval.write_predictions_and_obs(
        predicted_region_table=predicted_region_table,
        actual_polyline_table=actual_polyline_table,
        pickle_file_name=output_file_name)
def _run(top_input_dir_name, first_time_string, last_time_string,
         pressure_level_mb, top_output_dir_name):
    """Computes theta_w (wet-bulb potential temperature) for NARR data.

    This is effectively the main method.

    :param top_input_dir_name: See documentation at top of file.
    :param first_time_string: Same.
    :param last_time_string: Same.
    :param pressure_level_mb: Same.
    :param top_output_dir_name: Same.
    """

    if pressure_level_mb <= 0:
        pressure_level_mb = None

    if pressure_level_mb is None:
        pressure_in_file_name_mb = DUMMY_PRESSURE_LEVEL_MB + 0
    else:
        pressure_in_file_name_mb = pressure_level_mb + 0

    first_time_unix_sec = time_conversion.string_to_unix_sec(
        first_time_string, INPUT_TIME_FORMAT)
    last_time_unix_sec = time_conversion.string_to_unix_sec(
        last_time_string, INPUT_TIME_FORMAT)

    valid_times_unix_sec = time_periods.range_and_interval_to_list(
        start_time_unix_sec=first_time_unix_sec,
        end_time_unix_sec=last_time_unix_sec,
        time_interval_sec=TIME_INTERVAL_SECONDS)

    num_times = len(valid_times_unix_sec)
    this_pressure_matrix_pascals = None

    for i in range(num_times):
        this_temperature_file_name = processed_narr_io.find_file_for_one_time(
            top_directory_name=top_input_dir_name,
            field_name=processed_narr_io.TEMPERATURE_NAME,
            pressure_level_mb=pressure_in_file_name_mb,
            valid_time_unix_sec=valid_times_unix_sec[i])

        print 'Reading data from: "{0:s}"...'.format(
            this_temperature_file_name)
        this_temperature_matrix_kelvins = (
            processed_narr_io.read_fields_from_file(
                this_temperature_file_name)[0])

        this_humidity_file_name = processed_narr_io.find_file_for_one_time(
            top_directory_name=top_input_dir_name,
            field_name=processed_narr_io.SPECIFIC_HUMIDITY_NAME,
            pressure_level_mb=pressure_in_file_name_mb,
            valid_time_unix_sec=valid_times_unix_sec[i])

        print 'Reading data from: "{0:s}"...'.format(this_humidity_file_name)
        this_humidity_matrix_kg_kg01 = (
            processed_narr_io.read_fields_from_file(this_humidity_file_name)[0]
        )

        if pressure_level_mb is None:
            this_pressure_file_name = processed_narr_io.find_file_for_one_time(
                top_directory_name=top_input_dir_name,
                field_name=processed_narr_io.HEIGHT_NAME,
                pressure_level_mb=pressure_in_file_name_mb,
                valid_time_unix_sec=valid_times_unix_sec[i])

            print 'Reading data from: "{0:s}"...'.format(
                this_pressure_file_name)

            this_pressure_matrix_pascals = (
                processed_narr_io.read_fields_from_file(
                    this_pressure_file_name)[0])

            print this_pressure_matrix_pascals[:5, :5]
        else:
            if this_pressure_matrix_pascals is None:
                this_pressure_matrix_pascals = numpy.full(
                    this_humidity_matrix_kg_kg01.shape,
                    pressure_level_mb * MB_TO_PASCALS)

        this_dewpoint_matrix_kelvins = (
            moisture_conversions.specific_humidity_to_dewpoint(
                specific_humidities_kg_kg01=this_humidity_matrix_kg_kg01,
                total_pressures_pascals=this_pressure_matrix_pascals))

        this_wb_temp_matrix_kelvins = (
            ge_conversions.dewpoint_to_wet_bulb_temperature(
                dewpoints_kelvins=this_dewpoint_matrix_kelvins,
                temperatures_kelvins=this_temperature_matrix_kelvins,
                total_pressures_pascals=this_pressure_matrix_pascals))

        this_theta_w_matrix_kelvins = (
            temperature_conversions.temperatures_to_potential_temperatures(
                temperatures_kelvins=this_wb_temp_matrix_kelvins,
                total_pressures_pascals=this_pressure_matrix_pascals))

        this_theta_w_file_name = processed_narr_io.find_file_for_one_time(
            top_directory_name=top_output_dir_name,
            field_name=processed_narr_io.WET_BULB_THETA_NAME,
            pressure_level_mb=pressure_in_file_name_mb,
            valid_time_unix_sec=valid_times_unix_sec[i],
            raise_error_if_missing=False)

        print 'Writing theta_w field to: "{0:s}"...\n'.format(
            this_theta_w_file_name)

        processed_narr_io.write_fields_to_file(
            pickle_file_name=this_theta_w_file_name,
            field_matrix=this_theta_w_matrix_kelvins,
            field_name=processed_narr_io.WET_BULB_THETA_NAME,
            pressure_level_pascals=pressure_in_file_name_mb * MB_TO_PASCALS,
            valid_times_unix_sec=valid_times_unix_sec[[i]])
Beispiel #20
0
def full_size_examples_to_eval_pairs(
        model_object, first_target_time_unix_sec, last_target_time_unix_sec,
        num_target_times_to_sample, num_points_per_time,
        top_narr_directory_name, top_frontal_grid_dir_name,
        narr_predictor_names, pressure_level_mb, dilation_distance_metres,
        num_classes, predictor_time_step_offsets=None, num_lead_time_steps=None,
        isotonic_model_object_by_class=None):
    """Creates evaluation pairs from full-size 3-D or 4-D examples.

    P = number of evaluation pairs created by this method
    K = number of classes

    :param model_object: See documentation for
        `downsized_examples_to_eval_pairs`.
    :param first_target_time_unix_sec: Same.
    :param last_target_time_unix_sec: Same.
    :param num_target_times_to_sample: Same.
    :param num_points_per_time: Same.
    :param top_narr_directory_name: Same.
    :param top_frontal_grid_dir_name: Same.
    :param narr_predictor_names: Same.
    :param pressure_level_mb: Same.
    :param dilation_distance_metres: Same.
    :param num_classes: Same.
    :param predictor_time_step_offsets: Same.
    :param num_lead_time_steps: Same.
    :param isotonic_model_object_by_class: Same.
    :return: class_probability_matrix: Same.
    :return: observed_labels: Same.
    """

    error_checking.assert_is_integer(num_target_times_to_sample)
    error_checking.assert_is_greater(num_target_times_to_sample, 0)
    error_checking.assert_is_integer(num_points_per_time)
    error_checking.assert_is_greater(num_points_per_time, 0)
    error_checking.assert_is_integer(num_classes)
    error_checking.assert_is_geq(num_classes, 2)

    if predictor_time_step_offsets is None:
        num_dimensions_per_example = 3
    else:
        num_dimensions_per_example = 4

    target_times_unix_sec = time_periods.range_and_interval_to_list(
        start_time_unix_sec=first_target_time_unix_sec,
        end_time_unix_sec=last_target_time_unix_sec,
        time_interval_sec=NARR_TIME_INTERVAL_SECONDS, include_endpoint=True)

    numpy.random.shuffle(target_times_unix_sec)
    target_times_unix_sec = target_times_unix_sec[:num_target_times_to_sample]
    target_time_strings = [
        time_conversion.unix_sec_to_string(t, TIME_FORMAT_FOR_LOG_MESSAGES)
        for t in target_times_unix_sec]

    class_probability_matrix = numpy.full(
        (num_target_times_to_sample, num_points_per_time, num_classes),
        numpy.nan)
    observed_labels = numpy.full(
        (num_target_times_to_sample, num_points_per_time), -1, dtype=int)

    for i in range(num_target_times_to_sample):
        print 'Drawing evaluation pairs from {0:s}...'.format(
            target_time_strings[i])

        if num_dimensions_per_example == 3:
            this_class_probability_matrix, this_actual_target_matrix = (
                fcn.apply_model_to_3d_example(
                    model_object=model_object,
                    target_time_unix_sec=target_times_unix_sec[i],
                    top_narr_directory_name=top_narr_directory_name,
                    top_frontal_grid_dir_name=top_frontal_grid_dir_name,
                    narr_predictor_names=narr_predictor_names,
                    pressure_level_mb=pressure_level_mb,
                    dilation_distance_metres=dilation_distance_metres,
                    num_classes=num_classes,
                    isotonic_model_object_by_class=
                    isotonic_model_object_by_class))
        else:
            this_class_probability_matrix, this_actual_target_matrix = (
                fcn.apply_model_to_4d_example(
                    model_object=model_object,
                    target_time_unix_sec=target_times_unix_sec[i],
                    num_predictor_time_steps=predictor_time_step_offsets,
                    num_lead_time_steps=num_lead_time_steps,
                    top_narr_directory_name=top_narr_directory_name,
                    top_frontal_grid_dir_name=top_frontal_grid_dir_name,
                    narr_predictor_names=narr_predictor_names,
                    pressure_level_mb=pressure_level_mb,
                    dilation_distance_metres=dilation_distance_metres,
                    num_classes=num_classes,
                    isotonic_model_object_by_class=
                    isotonic_model_object_by_class))

        these_row_indices, these_column_indices = _get_random_sample_points(
            num_points=num_points_per_time, for_downsized_examples=False)

        class_probability_matrix[i, ...] = this_class_probability_matrix[
            0, these_row_indices, these_column_indices, ...]
        this_actual_target_matrix = this_actual_target_matrix[
            0, these_row_indices, these_column_indices]
        observed_labels[i, :] = numpy.reshape(
            this_actual_target_matrix, this_actual_target_matrix.size)

    new_dimensions = (
        num_target_times_to_sample * num_points_per_time, num_classes)
    class_probability_matrix = numpy.reshape(
        class_probability_matrix, new_dimensions)
    observed_labels = numpy.reshape(observed_labels, observed_labels.size)

    return class_probability_matrix, observed_labels
def _process_wpc_bulletins(first_time_string, last_time_string,
                           top_bulletin_dir_name, top_polyline_dir_name,
                           top_frontal_grid_dir_name):
    """Turns warm/cold fronts from WPC bulletins into polylines and NARR grids.

    :param first_time_string: Time (format "yyyymmddHH").  This script turns
        warm/cold fronts into polylines and NARR grids for all 3-hour time steps
        from `first_time_string`...`last_time_string`.
    :param last_time_string: See above.
    :param top_bulletin_dir_name: [input] Name of top-level directory with WPC
        bulletins.
    :param top_polyline_dir_name: [output] Name of top-level directory for
        Pickle files with frontal polylines.
    :param top_frontal_grid_dir_name: [output] Name of top-level directory for
        Pickle files with frontal grids.
    """

    first_time_unix_sec = time_conversion.string_to_unix_sec(
        first_time_string, INPUT_TIME_FORMAT)
    last_time_unix_sec = time_conversion.string_to_unix_sec(
        last_time_string, INPUT_TIME_FORMAT)
    valid_times_unix_sec = time_periods.range_and_interval_to_list(
        start_time_unix_sec=first_time_unix_sec,
        end_time_unix_sec=last_time_unix_sec,
        time_interval_sec=TIME_INTERVAL_SECONDS,
        include_endpoint=True)

    num_times = len(valid_times_unix_sec)
    for i in range(num_times):
        this_bulletin_file_name = wpc_bulletin_io.find_file(
            valid_time_unix_sec=valid_times_unix_sec[i],
            top_directory_name=top_bulletin_dir_name,
            raise_error_if_missing=False)

        if not os.path.isfile(this_bulletin_file_name):
            warning_string = 'Cannot find file.  Expected at: "{0:s}"'.format(
                this_bulletin_file_name)
            warnings.warn(warning_string)
            continue

        print 'Reading data from: "{0:s}"...'.format(this_bulletin_file_name)
        this_polyline_table = wpc_bulletin_io.read_fronts_from_file(
            this_bulletin_file_name)

        this_polyline_file_name = fronts_io.find_file_for_one_time(
            top_directory_name=top_polyline_dir_name,
            file_type=fronts_io.POLYLINE_FILE_TYPE,
            valid_time_unix_sec=valid_times_unix_sec[i],
            raise_error_if_missing=False)

        print 'Writing polylines to file: "{0:s}"...'.format(
            this_polyline_file_name)
        fronts_io.write_polylines_to_file(
            front_table=this_polyline_table,
            pickle_file_name=this_polyline_file_name)

        print 'Converting polylines to NARR grids...'
        this_frontal_grid_table = front_utils.many_polylines_to_narr_grid(
            polyline_table=this_polyline_table,
            dilation_distance_metres=DILATION_DISTANCE_METRES)

        this_gridded_file_name = fronts_io.find_file_for_one_time(
            top_directory_name=top_frontal_grid_dir_name,
            file_type=fronts_io.GRIDDED_FILE_TYPE,
            valid_time_unix_sec=valid_times_unix_sec[i],
            raise_error_if_missing=False)

        print 'Writing NARR grids to file: "{0:s}"...\n'.format(
            this_gridded_file_name)
        fronts_io.write_narr_grids_to_file(
            frontal_grid_table=this_frontal_grid_table,
            pickle_file_name=this_gridded_file_name)
def _run(top_tracking_dir_name, first_spc_date_string, last_spc_date_string,
         colour_map_name, min_plot_latitude_deg, max_plot_latitude_deg,
         min_plot_longitude_deg, max_plot_longitude_deg, output_file_name):
    """Plots storm tracks for a continuous time period.

    This is effectively the main method.

    :param top_tracking_dir_name: See documentation at top of file.
    :param first_spc_date_string: Same.
    :param last_spc_date_string: Same.
    :param colour_map_name: Same.
    :param min_plot_latitude_deg: Same.
    :param max_plot_latitude_deg: Same.
    :param min_plot_longitude_deg: Same.
    :param max_plot_longitude_deg: Same.
    :param output_file_name: Same.
    """

    if colour_map_name in ['', 'None']:
        colour_map_object = 'random'
    else:
        colour_map_object = pyplot.cm.get_cmap(colour_map_name)

    if min_plot_latitude_deg <= SENTINEL_VALUE:
        min_plot_latitude_deg = None
    if max_plot_latitude_deg <= SENTINEL_VALUE:
        max_plot_latitude_deg = None
    if min_plot_longitude_deg <= SENTINEL_VALUE:
        min_plot_longitude_deg = None
    if max_plot_longitude_deg <= SENTINEL_VALUE:
        max_plot_longitude_deg = None

    file_system_utils.mkdir_recursive_if_necessary(file_name=output_file_name)

    spc_date_strings = time_conversion.get_spc_dates_in_range(
        first_spc_date_string=first_spc_date_string,
        last_spc_date_string=last_spc_date_string)

    list_of_storm_object_tables = []

    for this_spc_date_string in spc_date_strings:
        these_file_names = tracking_io.find_files_one_spc_date(
            top_tracking_dir_name=top_tracking_dir_name,
            tracking_scale_metres2=echo_top_tracking.
            DUMMY_TRACKING_SCALE_METRES2,
            source_name=tracking_utils.SEGMOTION_NAME,
            spc_date_string=this_spc_date_string,
            raise_error_if_missing=False)[0]

        if len(these_file_names) == 0:
            continue

        this_storm_object_table = tracking_io.read_many_files(
            these_file_names)[REQUIRED_COLUMNS]

        list_of_storm_object_tables.append(this_storm_object_table)

        if this_spc_date_string != spc_date_strings[-1]:
            print(MINOR_SEPARATOR_STRING)

        if len(list_of_storm_object_tables) == 1:
            continue

        list_of_storm_object_tables[-1] = list_of_storm_object_tables[
            -1].align(list_of_storm_object_tables[0], axis=1)[0]

    print(SEPARATOR_STRING)
    storm_object_table = pandas.concat(list_of_storm_object_tables,
                                       axis=0,
                                       ignore_index=True)

    # TODO(thunderhoser): HACK
    first_time_unix_sec = time_conversion.string_to_unix_sec(
        '2011-04-27-20', '%Y-%m-%d-%H')
    storm_object_table = storm_object_table.loc[storm_object_table[
        tracking_utils.VALID_TIME_COLUMN] >= first_time_unix_sec]

    if min_plot_latitude_deg is None:
        min_plot_latitude_deg = numpy.min(
            storm_object_table[tracking_utils.CENTROID_LATITUDE_COLUMN].values
        ) - LATLNG_BUFFER_DEG

    if max_plot_latitude_deg is None:
        max_plot_latitude_deg = numpy.max(
            storm_object_table[tracking_utils.CENTROID_LATITUDE_COLUMN].values
        ) + LATLNG_BUFFER_DEG

    if min_plot_longitude_deg is None:
        min_plot_longitude_deg = numpy.min(
            storm_object_table[tracking_utils.CENTROID_LONGITUDE_COLUMN].values
        ) - LATLNG_BUFFER_DEG

    if max_plot_longitude_deg is None:
        max_plot_longitude_deg = numpy.max(
            storm_object_table[tracking_utils.CENTROID_LONGITUDE_COLUMN].values
        ) + LATLNG_BUFFER_DEG

    _, axes_object, basemap_object = (
        plotting_utils.create_equidist_cylindrical_map(
            min_latitude_deg=min_plot_latitude_deg,
            max_latitude_deg=max_plot_latitude_deg,
            min_longitude_deg=min_plot_longitude_deg,
            max_longitude_deg=max_plot_longitude_deg,
            resolution_string='i'))

    # plotting_utils.plot_coastlines(
    #     basemap_object=basemap_object, axes_object=axes_object,
    #     line_colour=BORDER_COLOUR
    # )
    plotting_utils.plot_countries(basemap_object=basemap_object,
                                  axes_object=axes_object,
                                  line_colour=BORDER_COLOUR)
    plotting_utils.plot_states_and_provinces(basemap_object=basemap_object,
                                             axes_object=axes_object,
                                             line_colour=BORDER_COLOUR)
    plotting_utils.plot_parallels(basemap_object=basemap_object,
                                  axes_object=axes_object,
                                  num_parallels=NUM_PARALLELS,
                                  line_colour=numpy.full(3, 1.))
    plotting_utils.plot_meridians(basemap_object=basemap_object,
                                  axes_object=axes_object,
                                  num_meridians=NUM_MERIDIANS,
                                  line_colour=numpy.full(3, 1.))

    colour_bar_object = storm_plotting.plot_storm_tracks(
        storm_object_table=storm_object_table,
        axes_object=axes_object,
        basemap_object=basemap_object,
        colour_map_object=colour_map_object)

    valid_times_unix_sec = (
        storm_object_table[tracking_utils.VALID_TIME_COLUMN].values)

    # TODO(thunderhoser): HACK
    tick_times_unix_sec = time_periods.range_and_interval_to_list(
        start_time_unix_sec=numpy.min(valid_times_unix_sec),
        end_time_unix_sec=numpy.max(valid_times_unix_sec),
        time_interval_sec=1800,
        include_endpoint=True)
    tick_time_strings = [
        time_conversion.unix_sec_to_string(t, COLOUR_BAR_TIME_FORMAT)
        for t in tick_times_unix_sec
    ]

    colour_bar_object.set_ticks(tick_times_unix_sec)
    colour_bar_object.set_ticklabels(tick_time_strings)

    print('Saving figure to: "{0:s}"...'.format(output_file_name))
    pyplot.savefig(output_file_name,
                   dpi=FIGURE_RESOLUTION_DPI,
                   pad_inches=0,
                   bbox_inches='tight')
    pyplot.close()
def _run(top_gridrad_dir_name, first_spc_date_string, last_spc_date_string,
         colour_map_name, grid_spacing_metres, output_file_name):
    """Plots GridRad domains.

    This is effectively the main method.

    :param top_gridrad_dir_name: See documentation at top of file.
    :param first_spc_date_string: Same.
    :param last_spc_date_string: Same.
    :param colour_map_name: Same.
    :param grid_spacing_metres: Same.
    :param output_file_name: Same.
    """

    colour_map_object = pyplot.get_cmap(colour_map_name)
    file_system_utils.mkdir_recursive_if_necessary(file_name=output_file_name)

    first_time_unix_sec = time_conversion.get_start_of_spc_date(
        first_spc_date_string)
    last_time_unix_sec = time_conversion.get_end_of_spc_date(
        last_spc_date_string)

    valid_times_unix_sec = time_periods.range_and_interval_to_list(
        start_time_unix_sec=first_time_unix_sec,
        end_time_unix_sec=last_time_unix_sec,
        time_interval_sec=TIME_INTERVAL_SEC,
        include_endpoint=True)

    valid_spc_date_strings = [
        time_conversion.time_to_spc_date_string(t)
        for t in valid_times_unix_sec
    ]

    domain_min_latitudes_deg = []
    domain_max_latitudes_deg = []
    domain_min_longitudes_deg = []
    domain_max_longitudes_deg = []

    prev_domain_limits_deg = numpy.full(4, numpy.nan)
    prev_spc_date_string = 'foo'
    num_times = len(valid_times_unix_sec)

    for i in range(num_times):
        this_gridrad_file_name = gridrad_io.find_file(
            unix_time_sec=valid_times_unix_sec[i],
            top_directory_name=top_gridrad_dir_name,
            raise_error_if_missing=False)

        if not os.path.isfile(this_gridrad_file_name):
            continue

        these_domain_limits_deg = _get_domain_one_file(this_gridrad_file_name)
        same_domain = (valid_spc_date_strings[i] == prev_spc_date_string
                       and numpy.allclose(these_domain_limits_deg,
                                          prev_domain_limits_deg, TOLERANCE))

        if same_domain:
            continue

        prev_domain_limits_deg = these_domain_limits_deg + 0.
        prev_spc_date_string = valid_spc_date_strings[i]

        domain_min_latitudes_deg.append(these_domain_limits_deg[0])
        domain_max_latitudes_deg.append(these_domain_limits_deg[1])
        domain_min_longitudes_deg.append(these_domain_limits_deg[2])
        domain_max_longitudes_deg.append(these_domain_limits_deg[3])

    print(SEPARATOR_STRING)

    domain_min_latitudes_deg = numpy.array(domain_min_latitudes_deg)
    domain_max_latitudes_deg = numpy.array(domain_max_latitudes_deg)
    domain_min_longitudes_deg = numpy.array(domain_min_longitudes_deg)
    domain_max_longitudes_deg = numpy.array(domain_max_longitudes_deg)
    num_domains = len(domain_min_latitudes_deg)

    grid_metadata_dict = grids.create_equidistant_grid(
        min_latitude_deg=OVERALL_MIN_LATITUDE_DEG,
        max_latitude_deg=OVERALL_MAX_LATITUDE_DEG,
        min_longitude_deg=OVERALL_MIN_LONGITUDE_DEG,
        max_longitude_deg=OVERALL_MAX_LONGITUDE_DEG,
        x_spacing_metres=grid_spacing_metres,
        y_spacing_metres=grid_spacing_metres,
        azimuthal=False)

    unique_x_coords_metres = grid_metadata_dict[grids.X_COORDS_KEY]
    unique_y_coords_metres = grid_metadata_dict[grids.Y_COORDS_KEY]
    projection_object = grid_metadata_dict[grids.PROJECTION_KEY]

    x_coord_matrix_metres, y_coord_matrix_metres = grids.xy_vectors_to_matrices(
        x_unique_metres=unique_x_coords_metres,
        y_unique_metres=unique_y_coords_metres)

    latitude_matrix_deg, longitude_matrix_deg = (
        projections.project_xy_to_latlng(x_coords_metres=x_coord_matrix_metres,
                                         y_coords_metres=y_coord_matrix_metres,
                                         projection_object=projection_object))

    num_grid_rows = latitude_matrix_deg.shape[0]
    num_grid_columns = latitude_matrix_deg.shape[1]
    num_days_matrix = numpy.full((num_grid_rows, num_grid_columns), 0)

    for i in range(num_domains):
        if numpy.mod(i, 10) == 0:
            print('Have found grid points in {0:d} of {1:d} domains...'.format(
                i, num_domains))

        this_lat_flag_matrix = numpy.logical_and(
            latitude_matrix_deg >= domain_min_latitudes_deg[i],
            latitude_matrix_deg <= domain_max_latitudes_deg[i])
        this_lng_flag_matrix = numpy.logical_and(
            longitude_matrix_deg >= domain_min_longitudes_deg[i],
            longitude_matrix_deg <= domain_max_longitudes_deg[i])

        num_days_matrix += numpy.logical_and(this_lat_flag_matrix,
                                             this_lng_flag_matrix).astype(int)

    print(SEPARATOR_STRING)

    figure_object, axes_object = _plot_data(
        num_days_matrix=num_days_matrix,
        grid_metadata_dict=grid_metadata_dict,
        colour_map_object=colour_map_object)

    plotting_utils.label_axes(axes_object=axes_object, label_string='(c)')

    print('Saving figure to: "{0:s}"...'.format(output_file_name))
    figure_object.savefig(output_file_name,
                          dpi=FIGURE_RESOLUTION_DPI,
                          pad_inches=0,
                          bbox_inches='tight')
    pyplot.close(figure_object)
def _run(top_narr_dir_name, top_front_line_dir_name, top_wpc_bulletin_dir_name,
         first_time_string, last_time_string, pressure_level_mb,
         thermal_field_name, thermal_colour_map_name,
         max_thermal_prctile_for_colours, first_letter_label, letter_interval,
         output_dir_name):
    """Plots predictors on full NARR grid.

    This is effectively the main method.

    :param top_narr_dir_name: See documentation at top of file.
    :param top_front_line_dir_name: Same.
    :param top_wpc_bulletin_dir_name: Same.
    :param first_time_string: Same.
    :param last_time_string: Same.
    :param pressure_level_mb: Same.
    :param thermal_field_name: Same.
    :param thermal_colour_map_name: Same.
    :param max_thermal_prctile_for_colours: Same.
    :param first_letter_label: Same.
    :param letter_interval: Same.
    :param output_dir_name: Same.
    :raises: ValueError: if
        `thermal_field_name not in VALID_THERMAL_FIELD_NAMES`.
    """

    # Check input args.
    if top_wpc_bulletin_dir_name in ['', 'None']:
        top_wpc_bulletin_dir_name = None

    if first_letter_label in ['', 'None']:
        first_letter_label = None

    if thermal_field_name not in VALID_THERMAL_FIELD_NAMES:
        error_string = (
            '\n{0:s}\nValid thermal fields (listed above) do not include '
            '"{1:s}".'
        ).format(str(VALID_THERMAL_FIELD_NAMES), thermal_field_name)

        raise ValueError(error_string)

    thermal_colour_map_object = pyplot.cm.get_cmap(thermal_colour_map_name)

    file_system_utils.mkdir_recursive_if_necessary(
        directory_name=output_dir_name)

    first_time_unix_sec = time_conversion.string_to_unix_sec(
        first_time_string, DEFAULT_TIME_FORMAT)
    last_time_unix_sec = time_conversion.string_to_unix_sec(
        last_time_string, DEFAULT_TIME_FORMAT)

    valid_times_unix_sec = time_periods.range_and_interval_to_list(
        start_time_unix_sec=first_time_unix_sec,
        end_time_unix_sec=last_time_unix_sec,
        time_interval_sec=NARR_TIME_INTERVAL_SEC, include_endpoint=True)

    # Read metadata for NARR grid.
    narr_latitude_matrix_deg, narr_longitude_matrix_deg = (
        nwp_model_utils.get_latlng_grid_point_matrices(
            model_name=nwp_model_utils.NARR_MODEL_NAME)
    )

    narr_rotation_cos_matrix, narr_rotation_sin_matrix = (
        nwp_model_utils.get_wind_rotation_angles(
            latitudes_deg=narr_latitude_matrix_deg,
            longitudes_deg=narr_longitude_matrix_deg,
            model_name=nwp_model_utils.NARR_MODEL_NAME)
    )

    narr_row_limits, narr_column_limits = (
        nwp_plotting.latlng_limits_to_rowcol_limits(
            min_latitude_deg=MIN_LATITUDE_DEG,
            max_latitude_deg=MAX_LATITUDE_DEG,
            min_longitude_deg=MIN_LONGITUDE_DEG,
            max_longitude_deg=MAX_LONGITUDE_DEG,
            model_name=nwp_model_utils.NARR_MODEL_NAME)
    )

    narr_rotation_cos_matrix = narr_rotation_cos_matrix[
        narr_row_limits[0]:(narr_row_limits[1] + 1),
        narr_column_limits[0]:(narr_column_limits[1] + 1)
    ]

    narr_rotation_sin_matrix = narr_rotation_sin_matrix[
        narr_row_limits[0]:(narr_row_limits[1] + 1),
        narr_column_limits[0]:(narr_column_limits[1] + 1)
    ]

    # Do plotting.
    narr_field_names = [
        processed_narr_io.U_WIND_GRID_RELATIVE_NAME,
        processed_narr_io.V_WIND_GRID_RELATIVE_NAME,
        thermal_field_name
    ]

    this_letter_label = None

    for this_time_unix_sec in valid_times_unix_sec:
        this_file_name = fronts_io.find_file_for_one_time(
            top_directory_name=top_front_line_dir_name,
            file_type=fronts_io.POLYLINE_FILE_TYPE,
            valid_time_unix_sec=this_time_unix_sec)

        print 'Reading data from: "{0:s}"...'.format(this_file_name)
        this_polyline_table = fronts_io.read_polylines_from_file(this_file_name)

        if top_wpc_bulletin_dir_name is None:
            this_high_low_table = None
        else:
            this_file_name = wpc_bulletin_io.find_file(
                top_directory_name=top_wpc_bulletin_dir_name,
                valid_time_unix_sec=this_time_unix_sec)

            print 'Reading data from: "{0:s}"...'.format(this_file_name)
            this_high_low_table = wpc_bulletin_io.read_highs_and_lows(
                this_file_name)

        this_predictor_matrix = None

        for this_field_name in narr_field_names:
            this_file_name = processed_narr_io.find_file_for_one_time(
                top_directory_name=top_narr_dir_name,
                field_name=this_field_name,
                pressure_level_mb=pressure_level_mb,
                valid_time_unix_sec=this_time_unix_sec)

            print 'Reading data from: "{0:s}"...'.format(this_file_name)
            this_field_matrix = processed_narr_io.read_fields_from_file(
                this_file_name
            )[0][0, ...]

            this_field_matrix = utils.fill_nans(this_field_matrix)
            this_field_matrix = this_field_matrix[
                narr_row_limits[0]:(narr_row_limits[1] + 1),
                narr_column_limits[0]:(narr_column_limits[1] + 1)
            ]

            if this_field_name in [processed_narr_io.TEMPERATURE_NAME,
                                   processed_narr_io.WET_BULB_THETA_NAME]:
                this_field_matrix -= ZERO_CELSIUS_IN_KELVINS

            if this_field_name == processed_narr_io.SPECIFIC_HUMIDITY_NAME:
                this_field_matrix = this_field_matrix * KG_TO_GRAMS

            this_field_matrix = numpy.expand_dims(this_field_matrix, axis=-1)

            if this_predictor_matrix is None:
                this_predictor_matrix = this_field_matrix + 0.
            else:
                this_predictor_matrix = numpy.concatenate(
                    (this_predictor_matrix, this_field_matrix), axis=-1)

        u_wind_index = narr_field_names.index(
            processed_narr_io.U_WIND_GRID_RELATIVE_NAME)
        v_wind_index = narr_field_names.index(
            processed_narr_io.V_WIND_GRID_RELATIVE_NAME)

        (this_predictor_matrix[..., u_wind_index],
         this_predictor_matrix[..., v_wind_index]
        ) = nwp_model_utils.rotate_winds_to_earth_relative(
            u_winds_grid_relative_m_s01=this_predictor_matrix[
                ..., u_wind_index],
            v_winds_grid_relative_m_s01=this_predictor_matrix[
                ..., v_wind_index],
            rotation_angle_cosines=narr_rotation_cos_matrix,
            rotation_angle_sines=narr_rotation_sin_matrix)

        this_title_string = time_conversion.unix_sec_to_string(
            this_time_unix_sec, NICE_TIME_FORMAT)

        if pressure_level_mb == 1013:
            this_title_string += ' at surface'
        else:
            this_title_string += ' at {0:d} mb'.format(pressure_level_mb)

        this_default_time_string = time_conversion.unix_sec_to_string(
            this_time_unix_sec, DEFAULT_TIME_FORMAT)

        this_output_file_name = '{0:s}/predictors_{1:s}.jpg'.format(
            output_dir_name, this_default_time_string)

        if first_letter_label is not None:
            if this_letter_label is None:
                this_letter_label = first_letter_label
            else:
                this_letter_label = chr(
                    ord(this_letter_label) + letter_interval
                )

        _plot_one_time(
            predictor_matrix=this_predictor_matrix,
            predictor_names=narr_field_names,
            front_polyline_table=this_polyline_table,
            high_low_table=this_high_low_table,
            thermal_colour_map_object=thermal_colour_map_object,
            max_thermal_prctile_for_colours=max_thermal_prctile_for_colours,
            narr_row_limits=narr_row_limits,
            narr_column_limits=narr_column_limits,
            title_string=this_title_string, letter_label=this_letter_label,
            output_file_name=this_output_file_name)

        print '\n'
Beispiel #25
0
def find_raw_file_inexact_time(desired_time_unix_sec,
                               spc_date_string,
                               field_name,
                               data_source,
                               top_directory_name,
                               height_m_asl=None,
                               max_time_offset_sec=None,
                               raise_error_if_missing=False):
    """Finds raw file at inexact time.

    If you know the exact valid time, use `find_raw_file`.

    :param desired_time_unix_sec: Desired valid time.
    :param spc_date_string: SPC date (format "yyyymmdd").
    :param field_name: Field name in GewitterGefahr format.
    :param data_source: Data source (string).
    :param top_directory_name: Name of top-level directory with raw files.
    :param height_m_asl: Radar height (metres above sea level).
    :param max_time_offset_sec: Maximum offset between actual and desired valid
        time.

    For example, if `desired_time_unix_sec` is 162933 UTC 5 Jan 2018 and
    `max_time_offset_sec` = 60, this method will look for az-shear at valid
    times from 162833...163033 UTC 5 Jan 2018.

    If None, this defaults to `DEFAULT_MAX_TIME_OFFSET_FOR_AZ_SHEAR_SEC` for
    azimuthal-shear fields and `DEFAULT_MAX_TIME_OFFSET_FOR_NON_SHEAR_SEC` for
    all other fields.

    :param raise_error_if_missing: Boolean flag.  If no file is found and
        raise_error_if_missing = True, this method will error out.  If no file
        is found and raise_error_if_missing = False, will return None.
    :return: raw_file_name: Path to raw file.
    :raises: ValueError: if no file is found and raise_error_if_missing = True.
    """

    # Error-checking.
    error_checking.assert_is_integer(desired_time_unix_sec)
    _ = time_conversion.spc_date_string_to_unix_sec(spc_date_string)
    error_checking.assert_is_boolean(raise_error_if_missing)

    radar_utils.check_field_name(field_name)
    if max_time_offset_sec is None:
        if field_name in AZIMUTHAL_SHEAR_FIELD_NAMES:
            max_time_offset_sec = DEFAULT_MAX_TIME_OFFSET_FOR_AZ_SHEAR_SEC
        else:
            max_time_offset_sec = DEFAULT_MAX_TIME_OFFSET_FOR_NON_SHEAR_SEC

    error_checking.assert_is_integer(max_time_offset_sec)
    error_checking.assert_is_greater(max_time_offset_sec, 0)

    first_allowed_minute_unix_sec = numpy.round(
        int(
            rounder.floor_to_nearest(
                float(desired_time_unix_sec - max_time_offset_sec),
                MINUTES_TO_SECONDS)))
    last_allowed_minute_unix_sec = numpy.round(
        int(
            rounder.floor_to_nearest(
                float(desired_time_unix_sec + max_time_offset_sec),
                MINUTES_TO_SECONDS)))

    allowed_minutes_unix_sec = time_periods.range_and_interval_to_list(
        start_time_unix_sec=first_allowed_minute_unix_sec,
        end_time_unix_sec=last_allowed_minute_unix_sec,
        time_interval_sec=MINUTES_TO_SECONDS,
        include_endpoint=True).astype(int)

    relative_directory_name = get_relative_dir_for_raw_files(
        field_name=field_name,
        data_source=data_source,
        height_m_asl=height_m_asl)

    raw_file_names = []
    for this_time_unix_sec in allowed_minutes_unix_sec:
        this_pathless_file_pattern = _get_pathless_raw_file_pattern(
            this_time_unix_sec)

        this_file_pattern = '{0:s}/{1:s}/{2:s}/{3:s}/{4:s}'.format(
            top_directory_name, spc_date_string[:4], spc_date_string,
            relative_directory_name, this_pathless_file_pattern)

        raw_file_names += glob.glob(this_file_pattern)

    file_times_unix_sec = []
    for this_raw_file_name in raw_file_names:
        file_times_unix_sec.append(raw_file_name_to_time(this_raw_file_name))

    if len(file_times_unix_sec):
        file_times_unix_sec = numpy.array(file_times_unix_sec)
        time_differences_sec = numpy.absolute(file_times_unix_sec -
                                              desired_time_unix_sec)
        nearest_index = numpy.argmin(time_differences_sec)
        min_time_diff_sec = time_differences_sec[nearest_index]
    else:
        min_time_diff_sec = numpy.inf

    if min_time_diff_sec > max_time_offset_sec:
        if raise_error_if_missing:
            desired_time_string = time_conversion.unix_sec_to_string(
                desired_time_unix_sec, TIME_FORMAT_FOR_LOG_MESSAGES)

            error_string = (
                'Could not find "{0:s}" file within {1:d} seconds of {2:s}.'
            ).format(field_name, max_time_offset_sec, desired_time_string)

            raise ValueError(error_string)

        return None

    return raw_file_names[nearest_index]
Beispiel #26
0
def find_polygon_files_for_spc_date(spc_date_unix_sec=None,
                                    top_raw_directory_name=None,
                                    tracking_scale_metres2=None,
                                    raise_error_if_missing=True):
    """Finds all polygon files for one SPC date.

    :param spc_date_unix_sec: SPC date.
    :param top_raw_directory_name: Name of top-level directory with raw
        segmotion files.
    :param tracking_scale_metres2: Tracking scale.
    :param raise_error_if_missing: If True and no files can be found, this
        method will raise an error.
    :return: polygon_file_names: 1-D list of paths to polygon files.
    """

    error_checking.assert_is_string(top_raw_directory_name)

    spc_date_string = time_conversion.time_to_spc_date_string(
        spc_date_unix_sec)
    directory_name = '{0:s}/{1:s}'.format(
        top_raw_directory_name,
        _get_relative_polygon_dir_physical_scale(spc_date_string,
                                                 tracking_scale_metres2))

    first_hour_unix_sec = SPC_DATE_START_HOUR * HOURS_TO_SECONDS + (
        time_conversion.string_to_unix_sec(spc_date_string,
                                           time_conversion.SPC_DATE_FORMAT))
    last_hour_unix_sec = SPC_DATE_END_HOUR * HOURS_TO_SECONDS + (
        time_conversion.string_to_unix_sec(spc_date_string,
                                           time_conversion.SPC_DATE_FORMAT))
    hours_in_spc_date_unix_sec = time_periods.range_and_interval_to_list(
        start_time_unix_sec=first_hour_unix_sec,
        end_time_unix_sec=last_hour_unix_sec,
        time_interval_sec=HOURS_TO_SECONDS,
        include_endpoint=True)

    polygon_file_names = []
    for this_hour_unix_sec in hours_in_spc_date_unix_sec:
        this_time_string_seconds = time_conversion.unix_sec_to_string(
            this_hour_unix_sec, TIME_FORMAT_IN_FILES)
        this_time_string_hours = time_conversion.unix_sec_to_string(
            this_hour_unix_sec, TIME_FORMAT_IN_FILES_HOUR_ONLY) + '*'

        this_pathless_file_name_zipped = _get_pathless_polygon_file_name(
            this_hour_unix_sec, zipped=True)
        this_pathless_file_pattern_zipped = (
            this_pathless_file_name_zipped.replace(this_time_string_seconds,
                                                   this_time_string_hours))
        this_file_pattern_zipped = '{0:s}/{1:s}'.format(
            directory_name, this_pathless_file_pattern_zipped)

        these_polygon_file_names_zipped = glob.glob(this_file_pattern_zipped)
        if these_polygon_file_names_zipped:
            polygon_file_names += these_polygon_file_names_zipped

        this_pathless_file_name_unzipped = _get_pathless_polygon_file_name(
            this_hour_unix_sec, zipped=False)
        this_pathless_file_pattern_unzipped = (
            this_pathless_file_name_unzipped.replace(this_time_string_seconds,
                                                     this_time_string_hours))
        this_file_pattern_unzipped = '{0:s}/{1:s}'.format(
            directory_name, this_pathless_file_pattern_unzipped)

        these_polygon_file_names_unzipped = glob.glob(
            this_file_pattern_unzipped)
        for this_file_name_unzipped in these_polygon_file_names_unzipped:
            this_file_name_zipped = (this_file_name_unzipped +
                                     GZIP_FILE_EXTENSION)
            if this_file_name_zipped in polygon_file_names:
                continue

            polygon_file_names.append(this_file_name_unzipped)

    if raise_error_if_missing and not polygon_file_names:
        raise ValueError('Cannot find any polygon files in directory: ' +
                         directory_name)

    polygon_file_names.sort()
    return polygon_file_names
def _run(first_time_string, last_time_string, max_num_examples_per_time,
         pressure_level_mb, narr_predictor_names, dilation_distance_metres,
         class_fractions, num_half_rows, num_half_columns,
         normalization_type_string, top_frontal_grid_dir_name,
         top_narr_directory_name, narr_mask_file_name, output_dir_name,
         num_times_per_output_file):
    """Writes downsized 3-D training examples to files.

    This is effectively the main method.

    :param first_time_string: See documentation at top of file.
    :param last_time_string: Same.
    :param max_num_examples_per_time: Same.
    :param pressure_level_mb: Same.
    :param narr_predictor_names: Same.
    :param dilation_distance_metres: Same.
    :param class_fractions: Same.
    :param num_half_rows: Same.
    :param num_half_columns: Same.
    :param normalization_type_string: Same.
    :param top_frontal_grid_dir_name: Same.
    :param top_narr_directory_name: Same.
    :param narr_mask_file_name: Same.
    :param output_dir_name: Same.
    :param num_times_per_output_file: Same.
    """

    first_time_unix_sec = time_conversion.string_to_unix_sec(
        first_time_string, INPUT_TIME_FORMAT)
    last_time_unix_sec = time_conversion.string_to_unix_sec(
        last_time_string, INPUT_TIME_FORMAT)
    target_times_unix_sec = time_periods.range_and_interval_to_list(
        start_time_unix_sec=first_time_unix_sec,
        end_time_unix_sec=last_time_unix_sec,
        time_interval_sec=NARR_TIME_STEP_SECONDS)

    if narr_mask_file_name == '':
        narr_mask_matrix = None
    else:
        print 'Reading NARR mask from: "{0:s}"...'.format(narr_mask_file_name)
        narr_mask_matrix = ml_utils.read_narr_mask(narr_mask_file_name)
        print SEPARATOR_STRING

    error_checking.assert_is_greater(num_times_per_output_file, 0)

    num_target_times = len(target_times_unix_sec)
    this_example_dict = None
    this_first_time_unix_sec = target_times_unix_sec[0]

    for i in range(num_target_times):
        if numpy.mod(i, num_times_per_output_file) == 0 and i != 0:
            if this_example_dict is not None:
                this_last_time_unix_sec = target_times_unix_sec[i - 1]
                this_output_file_name = (
                    trainval_io.find_downsized_3d_example_file(
                        top_directory_name=output_dir_name,
                        first_target_time_unix_sec=this_first_time_unix_sec,
                        last_target_time_unix_sec=this_last_time_unix_sec,
                        raise_error_if_missing=False))

                print 'Writing data to file: "{0:s}"...'.format(
                    this_output_file_name)
                trainval_io.write_downsized_3d_examples(
                    netcdf_file_name=this_output_file_name,
                    example_dict=this_example_dict,
                    narr_predictor_names=narr_predictor_names,
                    pressure_level_mb=pressure_level_mb,
                    dilation_distance_metres=dilation_distance_metres,
                    narr_mask_matrix=narr_mask_matrix)

            print SEPARATOR_STRING
            this_example_dict = None
            this_first_time_unix_sec = target_times_unix_sec[i]

        this_new_example_dict = trainval_io.prep_downsized_3d_examples_to_write(
            target_time_unix_sec=target_times_unix_sec[i],
            max_num_examples=max_num_examples_per_time,
            top_narr_directory_name=top_narr_directory_name,
            top_frontal_grid_dir_name=top_frontal_grid_dir_name,
            narr_predictor_names=narr_predictor_names,
            pressure_level_mb=pressure_level_mb,
            dilation_distance_metres=dilation_distance_metres,
            class_fractions=class_fractions,
            num_rows_in_half_grid=num_half_rows,
            num_columns_in_half_grid=num_half_columns,
            normalization_type_string=normalization_type_string,
            narr_mask_matrix=narr_mask_matrix)

        print '\n'
        if this_new_example_dict is None:
            continue

        if this_example_dict is None:
            this_example_dict = copy.deepcopy(this_new_example_dict)
            continue

        for this_key in trainval_io.MAIN_KEYS:
            this_example_dict[this_key] = numpy.concatenate(
                (this_example_dict[this_key], this_new_example_dict[this_key]),
                axis=0)

    if this_example_dict is not None:
        this_last_time_unix_sec = target_times_unix_sec[-1]
        this_output_file_name = trainval_io.find_downsized_3d_example_file(
            top_directory_name=output_dir_name,
            first_target_time_unix_sec=this_first_time_unix_sec,
            last_target_time_unix_sec=this_last_time_unix_sec,
            raise_error_if_missing=False)

        print 'Writing data to file: "{0:s}"...'.format(this_output_file_name)
        trainval_io.write_downsized_3d_examples(
            netcdf_file_name=this_output_file_name,
            example_dict=this_example_dict,
            narr_predictor_names=narr_predictor_names,
            pressure_level_mb=pressure_level_mb,
            dilation_distance_metres=dilation_distance_metres,
            narr_mask_matrix=narr_mask_matrix)