def _find_gridrad_file_for_date(top_gridrad_dir_name, spc_date_string): """Tries to find one GridRad file for given SPC date. :param top_gridrad_dir_name: See documentation at top of file. :param spc_date_string: SPC date or convective day (format "yyyymmdd"). :return: gridrad_file_name: Path to GridRad file. If no files were found for the given SPC date, returns None. """ first_time_unix_sec = time_conversion.get_start_of_spc_date( spc_date_string) last_time_unix_sec = time_conversion.get_end_of_spc_date(spc_date_string) all_times_unix_sec = time_periods.range_and_interval_to_list( start_time_unix_sec=first_time_unix_sec, end_time_unix_sec=last_time_unix_sec, time_interval_sec=TIME_INTERVAL_SEC, include_endpoint=True) for this_time_unix_sec in all_times_unix_sec: this_gridrad_file_name = gridrad_io.find_file( unix_time_sec=this_time_unix_sec, top_directory_name=top_gridrad_dir_name, raise_error_if_missing=False) if os.path.isfile(this_gridrad_file_name): return this_gridrad_file_name return None
def _download_rap_analyses(first_init_time_string, last_init_time_string, top_local_directory_name): """Downloads zero-hour analyses from the RAP (Rapid Refresh) model. :param first_init_time_string: See documentation at top of file. :param last_init_time_string: Same. :param top_local_directory_name: Same. """ first_init_time_unix_sec = time_conversion.string_to_unix_sec( first_init_time_string, INPUT_TIME_FORMAT) last_init_time_unix_sec = time_conversion.string_to_unix_sec( last_init_time_string, INPUT_TIME_FORMAT) time_interval_sec = HOURS_TO_SECONDS * nwp_model_utils.get_time_steps( nwp_model_utils.RAP_MODEL_NAME)[1] init_times_unix_sec = time_periods.range_and_interval_to_list( start_time_unix_sec=first_init_time_unix_sec, end_time_unix_sec=last_init_time_unix_sec, time_interval_sec=time_interval_sec) init_time_strings = [ time_conversion.unix_sec_to_string(t, DEFAULT_TIME_FORMAT) for t in init_times_unix_sec] num_init_times = len(init_times_unix_sec) local_file_names = [None] * num_init_times for i in range(num_init_times): local_file_names[i] = nwp_model_io.find_rap_file_any_grid( top_directory_name=top_local_directory_name, init_time_unix_sec=init_times_unix_sec[i], lead_time_hours=0, raise_error_if_missing=False) if local_file_names[i] is not None: continue local_file_names[i] = nwp_model_io.download_rap_file_any_grid( top_local_directory_name=top_local_directory_name, init_time_unix_sec=init_times_unix_sec[i], lead_time_hours=0, raise_error_if_fails=False) if local_file_names[i] is None: print '\nPROBLEM. Download failed for {0:s}.\n\n'.format( init_time_strings[i]) else: print '\nSUCCESS. File was downloaded to "{0:s}".\n\n'.format( local_file_names[i]) time.sleep(SECONDS_TO_PAUSE_BETWEEN_FILES) num_downloaded = numpy.sum(numpy.array( [f is not None for f in local_file_names])) print '{0:d} of {1:d} files were downloaded successfully!'.format( num_downloaded, num_init_times)
def test_range_and_interval_to_list_exclude_endpoint(self): """Ensures correct output from range_and_interval_to_list. In this case, endpoint of period is excluded from list of exact times. """ these_times_unix_sec = time_periods.range_and_interval_to_list( start_time_unix_sec=ESTIMATED_START_TIME_UNIX_SEC, end_time_unix_sec=ESTIMATED_END_TIME_UNIX_SEC, time_interval_sec=TIME_INTERVAL_SEC, include_endpoint=False) self.assertTrue( numpy.array_equal(these_times_unix_sec, TIMES_WITHOUT_ENDPOINT_UNIX_SEC))
def find_processed_hourly_files(start_time_unix_sec=None, end_time_unix_sec=None, primary_source=None, secondary_source=None, top_directory_name=None, raise_error_if_missing=True): """Finds processed hourly wind files on local machine. N = number of hours in time period (start_time_unix_sec...end_time_unix_sec) :param start_time_unix_sec: Beginning of time period. :param end_time_unix_sec: End of time period. :param primary_source: String ID for primary data source. :param secondary_source: String ID for secondary data source. :param top_directory_name: Name of top-level directory with processed wind files. :param raise_error_if_missing: Boolean flag. If True and *any* file is missing, this method will raise an error. :return: processed_file_names: length-N list of paths to processed files. :return: hours_unix_sec: length-N numpy array of corresponding hours. """ min_hour_unix_sec = int( rounder.floor_to_nearest(start_time_unix_sec, HOURS_TO_SECONDS)) max_hour_unix_sec = int( rounder.floor_to_nearest(end_time_unix_sec, HOURS_TO_SECONDS)) hours_unix_sec = time_periods.range_and_interval_to_list( start_time_unix_sec=min_hour_unix_sec, end_time_unix_sec=max_hour_unix_sec, time_interval_sec=HOURS_TO_SECONDS, include_endpoint=True) num_hours = len(hours_unix_sec) processed_file_names = [''] * num_hours for i in range(num_hours): processed_file_names[i] = find_processed_file( start_time_unix_sec=hours_unix_sec[i], end_time_unix_sec=hours_unix_sec[i] + HOURS_TO_SECONDS - 1, primary_source=primary_source, secondary_source=secondary_source, top_directory_name=top_directory_name, raise_error_if_missing=raise_error_if_missing) return processed_file_names, hours_unix_sec
def get_spc_dates_in_range(first_spc_date_string, last_spc_date_string): """Returns list of SPC dates in range. :param first_spc_date_string: First SPC date in range (format "yyyymmdd"). :param last_spc_date_string: Last SPC date in range (format "yyyymmdd"). :return: spc_date_strings: 1-D list of SPC dates (format "yyyymmdd"). """ first_spc_date_unix_sec = string_to_unix_sec(first_spc_date_string, SPC_DATE_FORMAT) last_spc_date_unix_sec = string_to_unix_sec(last_spc_date_string, SPC_DATE_FORMAT) error_checking.assert_is_geq(last_spc_date_unix_sec, first_spc_date_unix_sec) spc_dates_unix_sec = time_periods.range_and_interval_to_list( start_time_unix_sec=first_spc_date_unix_sec, end_time_unix_sec=last_spc_date_unix_sec, time_interval_sec=DAYS_TO_SECONDS, include_endpoint=True) return [unix_sec_to_string(t, SPC_DATE_FORMAT) for t in spc_dates_unix_sec]
def _run(input_dir_name, first_init_time_string, last_init_time_string, output_dir_name): """Combines forecasts from different initial times. This is effectively the main method. :param input_dir_name: See documentation at top of file. :param first_init_time_string: Same. :param last_init_time_string: Same. :param output_dir_name: Same. """ first_init_time_unix_sec = time_conversion.string_to_unix_sec( first_init_time_string, INPUT_TIME_FORMAT) last_init_time_unix_sec = time_conversion.string_to_unix_sec( last_init_time_string, INPUT_TIME_FORMAT) init_times_unix_sec = time_periods.range_and_interval_to_list( start_time_unix_sec=first_init_time_unix_sec, end_time_unix_sec=last_init_time_unix_sec, time_interval_sec=MAX_LEAD_TIME_SECONDS, include_endpoint=True) probability_matrix = None gridded_forecast_dict = None for this_time_unix_sec in init_times_unix_sec: this_file_name = prediction_io.find_gridded_file( directory_name=input_dir_name, first_init_time_unix_sec=this_time_unix_sec, last_init_time_unix_sec=this_time_unix_sec, raise_error_if_missing=True) print('Reading data from: "{0:s}"...'.format(this_file_name)) gridded_forecast_dict = prediction_io.read_gridded_predictions( this_file_name) assert (gridded_forecast_dict[prediction_io.MIN_LEAD_TIME_KEY] == MIN_LEAD_TIME_SECONDS) assert (gridded_forecast_dict[prediction_io.MAX_LEAD_TIME_KEY] == MAX_LEAD_TIME_SECONDS) this_probability_matrix = gridded_forecast_dict[ prediction_io.XY_PROBABILITIES_KEY][0] if not isinstance(this_probability_matrix, numpy.ndarray): this_probability_matrix = this_probability_matrix.toarray() if probability_matrix is None: probability_matrix = this_probability_matrix + 0. else: probability_matrix = numpy.stack( (probability_matrix, this_probability_matrix), axis=-1) probability_matrix = numpy.nanmax(probability_matrix, axis=-1) print(probability_matrix.shape) print('\n') for this_key in prediction_io.LATLNG_KEYS: if this_key in gridded_forecast_dict: gridded_forecast_dict.pop(this_key) gridded_forecast_dict[prediction_io.INIT_TIMES_KEY] = ( init_times_unix_sec[[0]]) gridded_forecast_dict[prediction_io.MAX_LEAD_TIME_KEY] = ( init_times_unix_sec[-1] + MAX_LEAD_TIME_SECONDS - init_times_unix_sec[0]) gridded_forecast_dict[prediction_io.XY_PROBABILITIES_KEY] = ([ probability_matrix ]) output_file_name = prediction_io.find_gridded_file( directory_name=output_dir_name, first_init_time_unix_sec=init_times_unix_sec[0], last_init_time_unix_sec=init_times_unix_sec[-1], raise_error_if_missing=False) print('Writing final grid to: "{0:s}"...'.format(output_file_name)) prediction_io.write_gridded_predictions( gridded_forecast_dict=gridded_forecast_dict, pickle_file_name=output_file_name)
def _run(first_time_string, last_time_string, randomize_times, num_times, thermal_field_name, smoothing_radius_pixels, warm_front_percentile, cold_front_percentile, num_closing_iters, pressure_level_mb, top_narr_directory_name, narr_mask_file_name, output_dir_name): """Uses NFA (numerical frontal analysis) to predict front type at each px. This is effectively the main method. :param first_time_string: See documentation at top of file. :param last_time_string: Same. :param randomize_times: Same. :param num_times: Same. :param thermal_field_name: Same. :param smoothing_radius_pixels: Same. :param warm_front_percentile: Same. :param cold_front_percentile: Same. :param num_closing_iters: Same. :param pressure_level_mb: Same. :param top_narr_directory_name: Same. :param narr_mask_file_name: Same. :param output_dir_name: Same. :raises: ValueError: if `thermal_field_name not in VALID_THERMAL_FIELD_NAMES`. """ if thermal_field_name not in VALID_THERMAL_FIELD_NAMES: error_string = ( '\n{0:s}\nValid thermal fields (listed above) do not include ' '"{1:s}".').format(str(VALID_THERMAL_FIELD_NAMES), thermal_field_name) raise ValueError(error_string) cutoff_radius_pixels = 4 * smoothing_radius_pixels first_time_unix_sec = time_conversion.string_to_unix_sec( first_time_string, INPUT_TIME_FORMAT) last_time_unix_sec = time_conversion.string_to_unix_sec( last_time_string, INPUT_TIME_FORMAT) valid_times_unix_sec = time_periods.range_and_interval_to_list( start_time_unix_sec=first_time_unix_sec, end_time_unix_sec=last_time_unix_sec, time_interval_sec=NARR_TIME_INTERVAL_SEC, include_endpoint=True) if randomize_times: error_checking.assert_is_leq(num_times, len(valid_times_unix_sec)) numpy.random.shuffle(valid_times_unix_sec) valid_times_unix_sec = valid_times_unix_sec[:num_times] if narr_mask_file_name == '': num_grid_rows, num_grid_columns = nwp_model_utils.get_grid_dimensions( model_name=nwp_model_utils.NARR_MODEL_NAME) narr_mask_matrix = numpy.full((num_grid_rows, num_grid_columns), 1, dtype=int) else: print 'Reading mask from: "{0:s}"...\n'.format(narr_mask_file_name) narr_mask_matrix = ml_utils.read_narr_mask(narr_mask_file_name) x_spacing_metres, y_spacing_metres = nwp_model_utils.get_xy_grid_spacing( model_name=nwp_model_utils.NARR_MODEL_NAME) num_times = len(valid_times_unix_sec) for i in range(num_times): this_thermal_file_name = processed_narr_io.find_file_for_one_time( top_directory_name=top_narr_directory_name, field_name=thermal_field_name, pressure_level_mb=pressure_level_mb, valid_time_unix_sec=valid_times_unix_sec[i]) print 'Reading data from: "{0:s}"...'.format(this_thermal_file_name) this_thermal_matrix_kelvins = processed_narr_io.read_fields_from_file( this_thermal_file_name)[0][0, ...] this_thermal_matrix_kelvins = general_utils.fill_nans( this_thermal_matrix_kelvins) this_thermal_matrix_kelvins = nfa.gaussian_smooth_2d_field( field_matrix=this_thermal_matrix_kelvins, standard_deviation_pixels=smoothing_radius_pixels, cutoff_radius_pixels=cutoff_radius_pixels) this_u_wind_file_name = processed_narr_io.find_file_for_one_time( top_directory_name=top_narr_directory_name, field_name=processed_narr_io.U_WIND_GRID_RELATIVE_NAME, pressure_level_mb=pressure_level_mb, valid_time_unix_sec=valid_times_unix_sec[i]) print 'Reading data from: "{0:s}"...'.format(this_u_wind_file_name) this_u_wind_matrix_m_s01 = processed_narr_io.read_fields_from_file( this_u_wind_file_name)[0][0, ...] this_u_wind_matrix_m_s01 = general_utils.fill_nans( this_u_wind_matrix_m_s01) this_u_wind_matrix_m_s01 = nfa.gaussian_smooth_2d_field( field_matrix=this_u_wind_matrix_m_s01, standard_deviation_pixels=smoothing_radius_pixels, cutoff_radius_pixels=cutoff_radius_pixels) this_v_wind_file_name = processed_narr_io.find_file_for_one_time( top_directory_name=top_narr_directory_name, field_name=processed_narr_io.V_WIND_GRID_RELATIVE_NAME, pressure_level_mb=pressure_level_mb, valid_time_unix_sec=valid_times_unix_sec[i]) print 'Reading data from: "{0:s}"...'.format(this_v_wind_file_name) this_v_wind_matrix_m_s01 = processed_narr_io.read_fields_from_file( this_v_wind_file_name)[0][0, ...] this_v_wind_matrix_m_s01 = general_utils.fill_nans( this_v_wind_matrix_m_s01) this_v_wind_matrix_m_s01 = nfa.gaussian_smooth_2d_field( field_matrix=this_v_wind_matrix_m_s01, standard_deviation_pixels=smoothing_radius_pixels, cutoff_radius_pixels=cutoff_radius_pixels) this_tfp_matrix_kelvins_m02 = nfa.get_thermal_front_param( thermal_field_matrix_kelvins=this_thermal_matrix_kelvins, x_spacing_metres=x_spacing_metres, y_spacing_metres=y_spacing_metres) this_tfp_matrix_kelvins_m02[narr_mask_matrix == 0] = 0. this_proj_velocity_matrix_m_s01 = nfa.project_wind_to_thermal_gradient( u_matrix_grid_relative_m_s01=this_u_wind_matrix_m_s01, v_matrix_grid_relative_m_s01=this_v_wind_matrix_m_s01, thermal_field_matrix_kelvins=this_thermal_matrix_kelvins, x_spacing_metres=x_spacing_metres, y_spacing_metres=y_spacing_metres) this_locating_var_matrix_m01_s01 = nfa.get_locating_variable( tfp_matrix_kelvins_m02=this_tfp_matrix_kelvins_m02, projected_velocity_matrix_m_s01=this_proj_velocity_matrix_m_s01) this_predicted_label_matrix = nfa.get_front_types( locating_var_matrix_m01_s01=this_locating_var_matrix_m01_s01, warm_front_percentile=warm_front_percentile, cold_front_percentile=cold_front_percentile) this_predicted_label_matrix = front_utils.close_frontal_image( ternary_image_matrix=this_predicted_label_matrix, num_iterations=num_closing_iters) this_prediction_file_name = nfa.find_prediction_file( directory_name=output_dir_name, first_valid_time_unix_sec=valid_times_unix_sec[i], last_valid_time_unix_sec=valid_times_unix_sec[i], ensembled=False, raise_error_if_missing=False) print 'Writing gridded predictions to file: "{0:s}"...\n'.format( this_prediction_file_name) nfa.write_gridded_predictions( pickle_file_name=this_prediction_file_name, predicted_label_matrix=numpy.expand_dims( this_predicted_label_matrix, axis=0), valid_times_unix_sec=valid_times_unix_sec[[i]], narr_mask_matrix=narr_mask_matrix, pressure_level_mb=pressure_level_mb, smoothing_radius_pixels=smoothing_radius_pixels, cutoff_radius_pixels=cutoff_radius_pixels, warm_front_percentile=warm_front_percentile, cold_front_percentile=cold_front_percentile, num_closing_iters=num_closing_iters)
def _run_for_gridrad(spc_date_string, top_radar_dir_name, top_output_dir_name, option_dict): """Runs echo classification for GridRad data. :param spc_date_string: See documentation at top of file. :param top_radar_dir_name: Same. :param top_output_dir_name: Same. :param option_dict: See doc for `echo_classification.find_convective_pixels`. """ valid_times_unix_sec = time_periods.range_and_interval_to_list( start_time_unix_sec=time_conversion.get_start_of_spc_date( spc_date_string), end_time_unix_sec=time_conversion.get_end_of_spc_date(spc_date_string), time_interval_sec=TIME_INTERVAL_SEC, include_endpoint=True) num_times = len(valid_times_unix_sec) radar_file_names = [''] * num_times indices_to_keep = [] for i in range(num_times): radar_file_names[i] = gridrad_io.find_file( top_directory_name=top_radar_dir_name, unix_time_sec=valid_times_unix_sec[i], raise_error_if_missing=False) if os.path.isfile(radar_file_names[i]): indices_to_keep.append(i) indices_to_keep = numpy.array(indices_to_keep, dtype=int) valid_times_unix_sec = valid_times_unix_sec[indices_to_keep] radar_file_names = [radar_file_names[k] for k in indices_to_keep] num_times = len(valid_times_unix_sec) for i in range(num_times): print 'Reading data from: "{0:s}"...\n'.format(radar_file_names[i]) radar_metadata_dict = gridrad_io.read_metadata_from_full_grid_file( netcdf_file_name=radar_file_names[i]) (reflectivity_matrix_dbz, all_heights_m_asl, grid_point_latitudes_deg, grid_point_longitudes_deg ) = gridrad_io.read_field_from_full_grid_file( netcdf_file_name=radar_file_names[i], field_name=radar_utils.REFL_NAME, metadata_dict=radar_metadata_dict) reflectivity_matrix_dbz = numpy.rollaxis(reflectivity_matrix_dbz, axis=0, start=3) height_indices = numpy.array( [all_heights_m_asl.tolist().index(h) for h in RADAR_HEIGHTS_M_ASL], dtype=int) reflectivity_matrix_dbz = reflectivity_matrix_dbz[..., height_indices] grid_metadata_dict = { echo_classifn.MIN_LATITUDE_KEY: numpy.min(grid_point_latitudes_deg), echo_classifn.LATITUDE_SPACING_KEY: grid_point_latitudes_deg[1] - grid_point_latitudes_deg[0], echo_classifn.MIN_LONGITUDE_KEY: numpy.min(grid_point_longitudes_deg), echo_classifn.LONGITUDE_SPACING_KEY: grid_point_longitudes_deg[1] - grid_point_longitudes_deg[0], echo_classifn.HEIGHTS_KEY: RADAR_HEIGHTS_M_ASL } convective_flag_matrix = echo_classifn.find_convective_pixels( reflectivity_matrix_dbz=reflectivity_matrix_dbz, grid_metadata_dict=grid_metadata_dict, valid_time_unix_sec=valid_times_unix_sec[i], option_dict=option_dict) print 'Number of convective pixels = {0:d}\n'.format( numpy.sum(convective_flag_matrix)) this_output_file_name = echo_classifn.find_classification_file( top_directory_name=top_output_dir_name, valid_time_unix_sec=valid_times_unix_sec[i], desire_zipped=False, allow_zipped_or_unzipped=False, raise_error_if_missing=False) print 'Writing echo classifications to: "{0:s}"...'.format( this_output_file_name) echo_classifn.write_classifications( convective_flag_matrix=convective_flag_matrix, grid_metadata_dict=grid_metadata_dict, valid_time_unix_sec=valid_times_unix_sec[i], option_dict=option_dict, netcdf_file_name=this_output_file_name) print SEPARATOR_STRING
def _run(model_file_name, first_time_string, last_time_string, randomize_times, num_target_times, use_isotonic_regression, top_narr_directory_name, top_frontal_grid_dir_name, output_dir_name): """Applies traditional CNN to full grids. This is effectively the main method. :param model_file_name: See documentation at top of file. :param first_time_string: Same. :param last_time_string: Same. :param randomize_times: Same. :param num_target_times: Same. :param use_isotonic_regression: Same. :param top_narr_directory_name: Same. :param top_frontal_grid_dir_name: Same. :param output_dir_name: Same. """ first_time_unix_sec = time_conversion.string_to_unix_sec( first_time_string, INPUT_TIME_FORMAT) last_time_unix_sec = time_conversion.string_to_unix_sec( last_time_string, INPUT_TIME_FORMAT) target_times_unix_sec = time_periods.range_and_interval_to_list( start_time_unix_sec=first_time_unix_sec, end_time_unix_sec=last_time_unix_sec, time_interval_sec=NARR_TIME_INTERVAL_SEC, include_endpoint=True) if randomize_times: error_checking.assert_is_leq( num_target_times, len(target_times_unix_sec)) numpy.random.shuffle(target_times_unix_sec) target_times_unix_sec = target_times_unix_sec[:num_target_times] print 'Reading model from: "{0:s}"...'.format(model_file_name) model_object = traditional_cnn.read_keras_model(model_file_name) model_metafile_name = traditional_cnn.find_metafile( model_file_name=model_file_name, raise_error_if_missing=True) print 'Reading model metadata from: "{0:s}"...'.format( model_metafile_name) model_metadata_dict = traditional_cnn.read_model_metadata( model_metafile_name) if use_isotonic_regression: isotonic_file_name = isotonic_regression.find_model_file( base_model_file_name=model_file_name, raise_error_if_missing=True) print 'Reading isotonic-regression models from: "{0:s}"...'.format( isotonic_file_name) isotonic_model_object_by_class = ( isotonic_regression.read_model_for_each_class(isotonic_file_name) ) else: isotonic_model_object_by_class = None if model_metadata_dict[traditional_cnn.NUM_LEAD_TIME_STEPS_KEY] is None: num_dimensions = 3 else: num_dimensions = 4 num_classes = len(model_metadata_dict[traditional_cnn.CLASS_FRACTIONS_KEY]) num_target_times = len(target_times_unix_sec) print SEPARATOR_STRING for i in range(num_target_times): if num_dimensions == 3: (this_class_probability_matrix, this_target_matrix ) = traditional_cnn.apply_model_to_3d_example( model_object=model_object, target_time_unix_sec=target_times_unix_sec[i], top_narr_directory_name=top_narr_directory_name, top_frontal_grid_dir_name=top_frontal_grid_dir_name, narr_predictor_names=model_metadata_dict[ traditional_cnn.NARR_PREDICTOR_NAMES_KEY], pressure_level_mb=model_metadata_dict[ traditional_cnn.PRESSURE_LEVEL_KEY], dilation_distance_metres=model_metadata_dict[ traditional_cnn.DILATION_DISTANCE_FOR_TARGET_KEY], num_rows_in_half_grid=model_metadata_dict[ traditional_cnn.NUM_ROWS_IN_HALF_GRID_KEY], num_columns_in_half_grid=model_metadata_dict[ traditional_cnn.NUM_COLUMNS_IN_HALF_GRID_KEY], num_classes=num_classes, isotonic_model_object_by_class=isotonic_model_object_by_class, narr_mask_matrix=model_metadata_dict[ traditional_cnn.NARR_MASK_MATRIX_KEY]) else: (this_class_probability_matrix, this_target_matrix ) = traditional_cnn.apply_model_to_4d_example( model_object=model_object, target_time_unix_sec=target_times_unix_sec[i], predictor_time_step_offsets=model_metadata_dict[ traditional_cnn.PREDICTOR_TIME_STEP_OFFSETS_KEY], num_lead_time_steps=model_metadata_dict[ traditional_cnn.NUM_LEAD_TIME_STEPS_KEY], top_narr_directory_name=top_narr_directory_name, top_frontal_grid_dir_name=top_frontal_grid_dir_name, narr_predictor_names=model_metadata_dict[ traditional_cnn.NARR_PREDICTOR_NAMES_KEY], pressure_level_mb=model_metadata_dict[ traditional_cnn.PRESSURE_LEVEL_KEY], dilation_distance_metres=model_metadata_dict[ traditional_cnn.DILATION_DISTANCE_FOR_TARGET_KEY], num_rows_in_half_grid=model_metadata_dict[ traditional_cnn.NUM_ROWS_IN_HALF_GRID_KEY], num_columns_in_half_grid=model_metadata_dict[ traditional_cnn.NUM_COLUMNS_IN_HALF_GRID_KEY], num_classes=num_classes, isotonic_model_object_by_class=isotonic_model_object_by_class, narr_mask_matrix=model_metadata_dict[ traditional_cnn.NARR_MASK_MATRIX_KEY]) this_target_matrix[this_target_matrix == -1] = 0 print MINOR_SEPARATOR_STRING this_prediction_file_name = ml_utils.find_gridded_prediction_file( directory_name=output_dir_name, first_target_time_unix_sec=target_times_unix_sec[i], last_target_time_unix_sec=target_times_unix_sec[i], raise_error_if_missing=False) print 'Writing gridded predictions to file: "{0:s}"...'.format( this_prediction_file_name) ml_utils.write_gridded_predictions( pickle_file_name=this_prediction_file_name, class_probability_matrix=this_class_probability_matrix, target_times_unix_sec=target_times_unix_sec[[i]], model_file_name=model_file_name, used_isotonic_regression=use_isotonic_regression, target_matrix=this_target_matrix) if i != num_target_times - 1: print SEPARATOR_STRING
def _find_domain_for_date(top_gridrad_dir_name, spc_date_string): """Finds GridRad domain for the given SPC date. If no GridRad files are found the given the SPC date, this method returns None for all output variables. :param top_gridrad_dir_name: See documentation at top of file. :param spc_date_string: SPC date or convective day (format "yyyymmdd"). :return: domain_limits_deg: length-4 numpy array with [min latitude, max latitude, min longitude, max longitude]. Units are deg N for latitude, deg W for longitude. """ first_time_unix_sec = time_conversion.get_start_of_spc_date(spc_date_string) last_time_unix_sec = time_conversion.get_end_of_spc_date(spc_date_string) valid_times_unix_sec = time_periods.range_and_interval_to_list( start_time_unix_sec=first_time_unix_sec, end_time_unix_sec=last_time_unix_sec, time_interval_sec=TIME_INTERVAL_SEC, include_endpoint=True ) num_times = len(valid_times_unix_sec) min_latitudes_deg = numpy.full(num_times, numpy.nan) max_latitudes_deg = numpy.full(num_times, numpy.nan) min_longitudes_deg = numpy.full(num_times, numpy.nan) max_longitudes_deg = numpy.full(num_times, numpy.nan) for i in range(num_times): this_file_name = gridrad_io.find_file( unix_time_sec=valid_times_unix_sec[i], top_directory_name=top_gridrad_dir_name, raise_error_if_missing=False ) if not os.path.isfile(this_file_name): continue print('Reading data from: "{0:s}"...'.format(this_file_name)) this_metadata_dict = gridrad_io.read_metadata_from_full_grid_file( this_file_name ) max_latitudes_deg[i] = ( this_metadata_dict[radar_utils.NW_GRID_POINT_LAT_COLUMN] ) min_longitudes_deg[i] = ( this_metadata_dict[radar_utils.NW_GRID_POINT_LNG_COLUMN] ) max_longitudes_deg[i] = min_longitudes_deg[i] + ( (this_metadata_dict[radar_utils.NUM_LNG_COLUMN] - 1) * this_metadata_dict[radar_utils.LNG_SPACING_COLUMN] ) min_latitudes_deg[i] = max_latitudes_deg[i] - ( (this_metadata_dict[radar_utils.NUM_LAT_COLUMN] - 1) * this_metadata_dict[radar_utils.LAT_SPACING_COLUMN] ) good_indices = numpy.where(numpy.invert(numpy.isnan(min_latitudes_deg)))[0] if len(good_indices) == 0: return None coord_matrix = numpy.vstack(( min_latitudes_deg[good_indices], max_latitudes_deg[good_indices], min_longitudes_deg[good_indices], max_longitudes_deg[good_indices] )) coord_matrix, num_instances_by_row = numpy.unique( numpy.transpose(coord_matrix), axis=0, return_counts=True ) print(coord_matrix) print(num_instances_by_row) domain_limits_deg = coord_matrix[numpy.argmax(num_instances_by_row), :] domain_limits_deg[2:] = -1 * lng_conversion.convert_lng_negative_in_west( longitudes_deg=domain_limits_deg[2:], allow_nan=False ) return domain_limits_deg
def _run(top_input_dir_name, first_time_string, last_time_string, input_field_name, pressure_level_mb, top_output_dir_name): """Converts NARR data to a more convenient file format. This is effectively the main method. :param top_input_dir_name: See documentation at top of file. :param first_time_string: Same. :param last_time_string: Same. :param input_field_name: Same. :param pressure_level_mb: Same. :param top_output_dir_name: Same. """ if pressure_level_mb <= 0: pressure_level_mb = None if pressure_level_mb is None: output_pressure_level_mb = DUMMY_PRESSURE_LEVEL_MB + 0 else: output_pressure_level_mb = pressure_level_mb + 0 first_time_unix_sec = time_conversion.string_to_unix_sec( first_time_string, INPUT_TIME_FORMAT) last_time_unix_sec = time_conversion.string_to_unix_sec( last_time_string, INPUT_TIME_FORMAT) valid_times_unix_sec = time_periods.range_and_interval_to_list( start_time_unix_sec=first_time_unix_sec, end_time_unix_sec=last_time_unix_sec, time_interval_sec=TIME_INTERVAL_SECONDS) if input_field_name == processed_narr_io.U_WIND_EARTH_RELATIVE_NAME: input_field_name_other = (processed_narr_io.V_WIND_EARTH_RELATIVE_NAME) elif input_field_name == processed_narr_io.V_WIND_EARTH_RELATIVE_NAME: input_field_name_other = (processed_narr_io.U_WIND_EARTH_RELATIVE_NAME) else: input_field_name_other = None input_field_name_grib1 = _std_to_grib1_field_name( field_name=input_field_name, pressure_level_mb=pressure_level_mb) if input_field_name in WIND_FIELD_NAMES: input_field_name_other_grib1 = _std_to_grib1_field_name( field_name=input_field_name_other, pressure_level_mb=pressure_level_mb) output_field_name = processed_narr_io.field_name_to_grid_relative( input_field_name) output_field_name_other = ( processed_narr_io.field_name_to_grid_relative( input_field_name_other)) (narr_latitude_matrix_deg, narr_longitude_matrix_deg ) = nwp_model_utils.get_latlng_grid_point_matrices( model_name=nwp_model_utils.NARR_MODEL_NAME) (narr_rotation_cosine_matrix, narr_rotation_sine_matrix) = nwp_model_utils.get_wind_rotation_angles( latitudes_deg=narr_latitude_matrix_deg, longitudes_deg=narr_longitude_matrix_deg, model_name=nwp_model_utils.NARR_MODEL_NAME) else: input_field_name_other_grib1 = None output_field_name = input_field_name + '' output_field_name_other = None num_times = len(valid_times_unix_sec) for i in range(num_times): if input_field_name in WIND_FIELD_NAMES: this_field_matrix_other = None if valid_times_unix_sec[i] > LAST_GRIB_TIME_UNIX_SEC: this_month_string = time_conversion.unix_sec_to_string( valid_times_unix_sec[i], MONTH_TIME_FORMAT) this_netcdf_file_name = narr_netcdf_io.find_file( top_directory_name=top_input_dir_name, field_name=input_field_name, month_string=this_month_string, is_surface=pressure_level_mb is None) print 'Reading data from: "{0:s}"...'.format(this_netcdf_file_name) this_field_matrix = narr_netcdf_io.read_file( netcdf_file_name=this_netcdf_file_name, field_name=input_field_name, valid_time_unix_sec=valid_times_unix_sec[i], pressure_level_mb=pressure_level_mb) if input_field_name in WIND_FIELD_NAMES: this_netcdf_file_name_other = narr_netcdf_io.find_file( top_directory_name=top_input_dir_name, field_name=input_field_name_other, month_string=this_month_string, is_surface=pressure_level_mb is None) print 'Reading data from: "{0:s}"...'.format( this_netcdf_file_name_other) this_field_matrix_other = narr_netcdf_io.read_file( netcdf_file_name=this_netcdf_file_name_other, field_name=input_field_name_other, valid_time_unix_sec=valid_times_unix_sec[i], pressure_level_mb=pressure_level_mb) else: this_grib_file_name = nwp_model_io.find_grib_file( top_directory_name=top_input_dir_name, model_name=nwp_model_utils.NARR_MODEL_NAME, init_time_unix_sec=valid_times_unix_sec[i], lead_time_hours=0) print 'Reading data from: "{0:s}"...'.format(this_grib_file_name) this_field_matrix = nwp_model_io.read_field_from_grib_file( grib_file_name=this_grib_file_name, field_name_grib1=input_field_name_grib1, model_name=nwp_model_utils.NARR_MODEL_NAME, wgrib_exe_name=WGRIB_EXE_NAME, wgrib2_exe_name=WGRIB2_EXE_NAME) if input_field_name in WIND_FIELD_NAMES: this_field_matrix_other = ( nwp_model_io.read_field_from_grib_file( grib_file_name=this_grib_file_name, field_name_grib1=input_field_name_other_grib1, model_name=nwp_model_utils.NARR_MODEL_NAME, wgrib_exe_name=WGRIB_EXE_NAME, wgrib2_exe_name=WGRIB2_EXE_NAME)) if input_field_name in WIND_FIELD_NAMES: print 'Rotating Earth-relative winds to grid-relative...' if input_field_name == processed_narr_io.U_WIND_EARTH_RELATIVE_NAME: this_field_matrix, this_field_matrix_other = ( nwp_model_utils.rotate_winds_to_grid_relative( u_winds_earth_relative_m_s01=this_field_matrix, v_winds_earth_relative_m_s01=this_field_matrix_other, rotation_angle_cosines=narr_rotation_cosine_matrix, rotation_angle_sines=narr_rotation_sine_matrix)) else: this_field_matrix_other, this_field_matrix = ( nwp_model_utils.rotate_winds_to_grid_relative( u_winds_earth_relative_m_s01=this_field_matrix_other, v_winds_earth_relative_m_s01=this_field_matrix, rotation_angle_cosines=narr_rotation_cosine_matrix, rotation_angle_sines=narr_rotation_sine_matrix)) this_output_file_name = processed_narr_io.find_file_for_one_time( top_directory_name=top_output_dir_name, field_name=output_field_name, pressure_level_mb=output_pressure_level_mb, valid_time_unix_sec=valid_times_unix_sec[i], raise_error_if_missing=False) print 'Writing processed data to: "{0:s}"...'.format( this_output_file_name) processed_narr_io.write_fields_to_file( pickle_file_name=this_output_file_name, field_matrix=numpy.expand_dims(this_field_matrix, axis=0), field_name=output_field_name, pressure_level_pascals=output_pressure_level_mb * MB_TO_PASCALS, valid_times_unix_sec=valid_times_unix_sec[[i]]) if input_field_name not in WIND_FIELD_NAMES: print '\n' continue this_output_file_name = processed_narr_io.find_file_for_one_time( top_directory_name=top_output_dir_name, field_name=output_field_name_other, pressure_level_mb=output_pressure_level_mb, valid_time_unix_sec=valid_times_unix_sec[i], raise_error_if_missing=False) print 'Writing processed data to: "{0:s}"...\n'.format( this_output_file_name) processed_narr_io.write_fields_to_file( pickle_file_name=this_output_file_name, field_matrix=numpy.expand_dims(this_field_matrix_other, axis=0), field_name=output_field_name_other, pressure_level_pascals=output_pressure_level_mb * MB_TO_PASCALS, valid_times_unix_sec=valid_times_unix_sec[[i]])
def _link_one_warning(warning_table, storm_object_table, max_distance_metres, min_lifetime_fraction, test_mode=False): """Links one warning to nearest storm. :param warning_table: pandas DataFrame with one row and the following columns. warning_table.start_time_unix_sec: Start time. warning_table.end_time_unix_sec: End time. warning_table.polygon_object_latlng: Polygon (instance of `shapely.geometry.Polygon`) with lat-long coordinates of warning boundary. warning_table.polygon_object_xy: Polygon (instance of `shapely.geometry.Polygon`) with x-y coordinates of warning boundary. :param storm_object_table: pandas DataFrame returned by `storm_tracking_io.read_file`. :param max_distance_metres: See documentation at top of file. :param min_lifetime_fraction: Same. :param test_mode: Never mind. Just leave this alone. :return: secondary_id_strings: 1-D list of secondary IDs for storms to which warning is linked. If warning is not linked to a storm, this is empty. """ warning_start_time_unix_sec = ( warning_table[WARNING_START_TIME_KEY].values[0]) warning_end_time_unix_sec = warning_table[WARNING_END_TIME_KEY].values[0] warning_polygon_object_xy = warning_table[WARNING_XY_POLYGON_KEY].values[0] orig_num_storm_objects = len(storm_object_table.index) storm_object_table = linkage._filter_storms_by_time( storm_object_table=storm_object_table, max_start_time_unix_sec=warning_end_time_unix_sec + 720, min_end_time_unix_sec=warning_start_time_unix_sec - 720) num_storm_objects = len(storm_object_table.index) print('Filtering by time removed {0:d} of {1:d} storm objects.'.format( orig_num_storm_objects - num_storm_objects, orig_num_storm_objects)) orig_num_storm_objects = num_storm_objects + 0 storm_object_table = _remove_far_away_storms( warning_polygon_object_latlng=warning_table[WARNING_LATLNG_POLYGON_KEY] .values[0], storm_object_table=storm_object_table) num_storm_objects = len(storm_object_table.index) print('Filtering by distance removed {0:d} of {1:d} storm objects.'.format( orig_num_storm_objects - num_storm_objects, orig_num_storm_objects)) warning_times_unix_sec = time_periods.range_and_interval_to_list( start_time_unix_sec=warning_start_time_unix_sec, end_time_unix_sec=warning_end_time_unix_sec, time_interval_sec=1 if test_mode else 60, include_endpoint=True) unique_sec_id_strings = numpy.unique( storm_object_table[tracking_utils.SECONDARY_ID_COLUMN].values) num_sec_id_strings = len(unique_sec_id_strings) num_warning_times = len(warning_times_unix_sec) distance_matrix_metres = numpy.full( (num_sec_id_strings, num_warning_times), numpy.nan) for j in range(num_warning_times): this_interp_vertex_table = linkage._interp_storms_in_time( storm_object_table=storm_object_table, target_time_unix_sec=warning_times_unix_sec[j], max_time_before_start_sec=0 if test_mode else 180, max_time_after_end_sec=0 if test_mode else 180) for i in range(num_sec_id_strings): these_indices = numpy.where( this_interp_vertex_table[tracking_utils.SECONDARY_ID_COLUMN]. values == unique_sec_id_strings[i])[0] if len(these_indices) == 0: continue these_x_metres = this_interp_vertex_table[ linkage.STORM_VERTEX_X_COLUMN].values[these_indices] these_y_metres = this_interp_vertex_table[ linkage.STORM_VERTEX_Y_COLUMN].values[these_indices] distance_matrix_metres[i, j] = _find_one_centroid_distance( storm_x_vertices_metres=these_x_metres, storm_y_vertices_metres=these_y_metres, warning_polygon_object_xy=warning_polygon_object_xy) lifetime_fractions = ( 1. - numpy.mean(numpy.isnan(distance_matrix_metres), axis=1)) bad_indices = numpy.where(lifetime_fractions < min_lifetime_fraction)[0] distance_matrix_metres[bad_indices, ...] = LARGE_NUMBER mean_distances_metres = numpy.nanmean(distance_matrix_metres, axis=1) good_indices = numpy.where(mean_distances_metres <= max_distance_metres)[0] print(( 'Linked warning to {0:d} storms. All distances (metres) printed below:' '\n{1:s}').format(len(good_indices), str(mean_distances_metres))) return [unique_sec_id_strings[k] for k in good_indices]
def downsized_examples_to_eval_pairs( model_object, first_target_time_unix_sec, last_target_time_unix_sec, num_target_times_to_sample, num_examples_per_time, top_narr_directory_name, top_frontal_grid_dir_name, narr_predictor_names, pressure_level_mb, dilation_distance_metres, num_rows_in_half_grid, num_columns_in_half_grid, num_classes, predictor_time_step_offsets=None, num_lead_time_steps=None, isotonic_model_object_by_class=None, narr_mask_matrix=None): """Creates evaluation pairs from downsized 3-D or 4-D examples. M = number of pixel rows in full NARR grid N = number of pixel columns in full NARR grid m = number of pixel rows in each downsized grid = 2 * num_rows_in_half_grid + 1 n = number of pixel columns in each downsized grid = 2 * num_columns_in_half_grid + 1 P = number of evaluation pairs created by this method K = number of classes :param model_object: Instance of `keras.models.Model`. This will be applied to each downsized example, creating the prediction for said example. :param first_target_time_unix_sec: Target time. Downsized examples will be randomly chosen from the period `first_target_time_unix_sec`... `last_target_time_unix_sec`. :param last_target_time_unix_sec: See above. :param num_target_times_to_sample: Number of target times to sample (from the period `first_target_time_unix_sec`...`last_target_time_unix_sec`). :param num_examples_per_time: Number of downsized examples per target time. Downsized examples will be randomly drawn from each target time. :param top_narr_directory_name: Name of top-level directory with NARR data (one file for each variable, pressure level, and time step). :param top_frontal_grid_dir_name: Name of top-level directory with frontal grids (one file per time step). :param narr_predictor_names: 1-D list of NARR fields to use as predictors. :param pressure_level_mb: Pressure level (millibars). :param dilation_distance_metres: Dilation distance for both warm and cold fronts. :param num_rows_in_half_grid: See general discussion above. :param num_columns_in_half_grid: See general discussion above. :param num_classes: Number of classes. :param predictor_time_step_offsets: [needed only if examples are 4-D] length-T numpy array of offsets between predictor times and (target time - lead time). :param num_lead_time_steps: [needed only if examples are 4-D] Number of time steps between latest predictor time (last image in the sequence) and target time. :param isotonic_model_object_by_class: length-K list with trained instances of `sklearn.isotonic.IsotonicRegression`. If None, will omit isotonic regression. :param narr_mask_matrix: M-by-N numpy array of integers (0 or 1). If narr_mask_matrix[i, j] = 0, cell [i, j] in the full grid will never be used to create an evaluation pair -- i.e., will never be used as the center of a downsized grid. If `narr_mask_matrix is None`, any cell in the full grid can be used to create an evaluation pair. :return: class_probability_matrix: See documentation for `check_evaluation_pairs`. :return: observed_labels: See doc for `check_evaluation_pairs`. """ error_checking.assert_is_integer(num_target_times_to_sample) error_checking.assert_is_greater(num_target_times_to_sample, 0) error_checking.assert_is_integer(num_examples_per_time) error_checking.assert_is_greater(num_examples_per_time, 0) error_checking.assert_is_integer(num_classes) error_checking.assert_is_geq(num_classes, 2) if predictor_time_step_offsets is None: num_dimensions_per_example = 3 else: num_dimensions_per_example = 4 target_times_unix_sec = time_periods.range_and_interval_to_list( start_time_unix_sec=first_target_time_unix_sec, end_time_unix_sec=last_target_time_unix_sec, time_interval_sec=NARR_TIME_INTERVAL_SECONDS, include_endpoint=True) numpy.random.shuffle(target_times_unix_sec) target_times_unix_sec = target_times_unix_sec[:num_target_times_to_sample] target_time_strings = [ time_conversion.unix_sec_to_string(t, TIME_FORMAT_FOR_LOG_MESSAGES) for t in target_times_unix_sec ] class_probability_matrix = numpy.full( (num_target_times_to_sample, num_examples_per_time, num_classes), numpy.nan) observed_labels = numpy.full( (num_target_times_to_sample, num_examples_per_time), -1, dtype=int) for i in range(num_target_times_to_sample): print 'Drawing evaluation pairs from {0:s}...'.format( target_time_strings[i]) (these_center_row_indices, these_center_column_indices ) = _get_random_sample_points( num_points=num_examples_per_time, for_downsized_examples=True, narr_mask_matrix=narr_mask_matrix) if num_dimensions_per_example == 3: (this_downsized_predictor_matrix, observed_labels[i, :], _, _ ) = testing_io.create_downsized_3d_examples( center_row_indices=these_center_row_indices, center_column_indices=these_center_column_indices, num_rows_in_half_grid=num_rows_in_half_grid, num_columns_in_half_grid=num_columns_in_half_grid, target_time_unix_sec=target_times_unix_sec[i], top_narr_directory_name=top_narr_directory_name, top_frontal_grid_dir_name=top_frontal_grid_dir_name, narr_predictor_names=narr_predictor_names, pressure_level_mb=pressure_level_mb, dilation_distance_metres=dilation_distance_metres, num_classes=num_classes) else: (this_downsized_predictor_matrix, observed_labels[i, :], _, _ ) = testing_io.create_downsized_4d_examples( center_row_indices=these_center_row_indices, center_column_indices=these_center_column_indices, num_rows_in_half_grid=num_rows_in_half_grid, num_columns_in_half_grid=num_columns_in_half_grid, target_time_unix_sec=target_times_unix_sec[i], predictor_time_step_offsets=predictor_time_step_offsets, num_lead_time_steps=num_lead_time_steps, top_narr_directory_name=top_narr_directory_name, top_frontal_grid_dir_name=top_frontal_grid_dir_name, narr_predictor_names=narr_predictor_names, pressure_level_mb=pressure_level_mb, dilation_distance_metres=dilation_distance_metres, num_classes=num_classes) class_probability_matrix[i, ...] = model_object.predict( this_downsized_predictor_matrix, batch_size=num_examples_per_time) new_dimensions = ( num_target_times_to_sample * num_examples_per_time, num_classes ) class_probability_matrix = numpy.reshape( class_probability_matrix, new_dimensions) observed_labels = numpy.reshape(observed_labels, observed_labels.size) if isotonic_model_object_by_class is not None: class_probability_matrix = ( isotonic_regression.apply_model_for_each_class( orig_class_probability_matrix=class_probability_matrix, observed_labels=observed_labels, model_object_by_class=isotonic_model_object_by_class)) return class_probability_matrix, observed_labels
def _find_io_files_for_renaming(top_input_dir_name, first_date_unix_sec, last_date_unix_sec, top_output_dir_name): """Finds input and output files for renaming storms. N = number of dates :param top_input_dir_name: See documentation for `rename_storms.` :param first_date_unix_sec: Same. :param last_date_unix_sec: Same. :param top_output_dir_name: Same. :return: input_file_names_by_date: length-N list, where the [i]th item is a numpy array of paths to input files for the [i]th date. :return: output_file_names_by_date: Same as above, but for output files. :return: valid_times_by_date_unix_sec: Same as above, but for valid times. All 3 arrays for the [i]th date have the same length. """ dates_unix_sec = time_periods.range_and_interval_to_list( start_time_unix_sec=first_date_unix_sec, end_time_unix_sec=last_date_unix_sec, time_interval_sec=DAYS_TO_SECONDS, include_endpoint=True) date_strings = [ time_conversion.unix_sec_to_string(t, DATE_FORMAT) for t in dates_unix_sec ] num_dates = len(date_strings) input_file_names_by_date = [numpy.array([], dtype=object)] * num_dates output_file_names_by_date = [numpy.array([], dtype=object)] * num_dates valid_times_by_date_unix_sec = [numpy.array([], dtype=int)] * num_dates for i in range(num_dates): print 'Finding input files for date {0:s}...'.format(date_strings[i]) (these_input_file_names, _) = tracking_io.find_processed_files_one_spc_date( spc_date_string=date_strings[i], data_source=tracking_utils.PROBSEVERE_SOURCE_ID, top_processed_dir_name=top_input_dir_name, tracking_scale_metres2=DUMMY_TRACKING_SCALE_METRES2, raise_error_if_missing=True) these_input_file_names.sort() these_valid_times_unix_sec = numpy.array([ tracking_io.processed_file_name_to_time(f) for f in these_input_file_names ], dtype=int) these_output_file_names = [] for t in these_valid_times_unix_sec: these_output_file_names.append( tracking_io.find_processed_file( unix_time_sec=t, data_source=tracking_utils.PROBSEVERE_SOURCE_ID, top_processed_dir_name=top_output_dir_name, tracking_scale_metres2=DUMMY_TRACKING_SCALE_METRES2, raise_error_if_missing=False)) input_file_names_by_date[i] = numpy.array(these_input_file_names, dtype=object) output_file_names_by_date[i] = numpy.array(these_output_file_names, dtype=object) valid_times_by_date_unix_sec[i] = these_valid_times_unix_sec print SEPARATOR_STRING return (input_file_names_by_date, output_file_names_by_date, valid_times_by_date_unix_sec)
def _run(top_frontal_grid_dir_name, first_time_string, last_time_string, dilation_distance_metres, min_num_fronts, output_dir_name): """Creates mask, indicating where human forecasters usually draw fronts. This is effectively the main method. :param top_frontal_grid_dir_name: See documentation at top of file. :param first_time_string: Same. :param last_time_string: Same. :param dilation_distance_metres: Same. :param min_num_fronts: Same. :param output_dir_name: Same. """ file_system_utils.mkdir_recursive_if_necessary( directory_name=output_dir_name) error_checking.assert_is_greater(min_num_fronts, 0) first_time_unix_sec = time_conversion.string_to_unix_sec( first_time_string, INPUT_TIME_FORMAT) last_time_unix_sec = time_conversion.string_to_unix_sec( last_time_string, INPUT_TIME_FORMAT) valid_times_unix_sec = time_periods.range_and_interval_to_list( start_time_unix_sec=first_time_unix_sec, end_time_unix_sec=last_time_unix_sec, time_interval_sec=TIME_INTERVAL_SECONDS) num_times = len(valid_times_unix_sec) num_grid_rows, num_grid_columns = nwp_model_utils.get_grid_dimensions( model_name=nwp_model_utils.NARR_MODEL_NAME) num_cold_fronts_matrix = None num_warm_fronts_matrix = None for i in range(num_times): this_file_name = fronts_io.find_file_for_one_time( top_directory_name=top_frontal_grid_dir_name, file_type=fronts_io.GRIDDED_FILE_TYPE, valid_time_unix_sec=valid_times_unix_sec[i], raise_error_if_missing=False) if not os.path.isfile(this_file_name): warning_string = ('POTENTIAL PROBLEM. Cannot find file: "{0:s}"' ).format(this_file_name) warnings.warn(warning_string) continue print 'Reading data from: "{0:s}"...'.format(this_file_name) this_frontal_grid_table = fronts_io.read_narr_grids_from_file( this_file_name) this_frontal_grid_matrix = ml_utils.front_table_to_images( frontal_grid_table=this_frontal_grid_table, num_rows_per_image=num_grid_rows, num_columns_per_image=num_grid_columns) this_frontal_grid_matrix = ml_utils.dilate_ternary_target_images( target_matrix=this_frontal_grid_matrix, dilation_distance_metres=dilation_distance_metres, verbose=False) this_frontal_grid_matrix = this_frontal_grid_matrix[0, ...] this_num_cold_fronts_matrix = (this_frontal_grid_matrix == front_utils. COLD_FRONT_INTEGER_ID).astype(int) this_num_warm_fronts_matrix = (this_frontal_grid_matrix == front_utils. WARM_FRONT_INTEGER_ID).astype(int) if num_cold_fronts_matrix is None: num_cold_fronts_matrix = this_num_cold_fronts_matrix + 0 num_warm_fronts_matrix = this_num_warm_fronts_matrix + 0 else: num_cold_fronts_matrix = (num_cold_fronts_matrix + this_num_cold_fronts_matrix) num_warm_fronts_matrix = (num_warm_fronts_matrix + this_num_warm_fronts_matrix) print SEPARATOR_STRING print 'Masking out grid cells with < {0:d} fronts...'.format( min_num_fronts) num_both_fronts_matrix = num_warm_fronts_matrix + num_cold_fronts_matrix mask_matrix = (num_both_fronts_matrix >= min_num_fronts).astype(int) pickle_file_name = '{0:s}/narr_mask.p'.format(output_dir_name) print 'Writing mask to: "{0:s}"...'.format(pickle_file_name) ml_utils.write_narr_mask(mask_matrix=mask_matrix, pickle_file_name=pickle_file_name) warm_front_map_file_name = '{0:s}/num_warm_fronts.jpg'.format( output_dir_name) _plot_front_densities(num_fronts_matrix=num_warm_fronts_matrix, colour_map_object=WARM_FRONT_COLOUR_MAP_OBJECT, title_string='Number of warm fronts', annotation_string='(a)', output_file_name=warm_front_map_file_name, mask_matrix=None, add_colour_bar=True) cold_front_map_file_name = '{0:s}/num_cold_fronts.jpg'.format( output_dir_name) _plot_front_densities(num_fronts_matrix=num_cold_fronts_matrix, colour_map_object=COLD_FRONT_COLOUR_MAP_OBJECT, title_string='Number of cold fronts', annotation_string='(b)', output_file_name=cold_front_map_file_name, mask_matrix=None, add_colour_bar=True) both_fronts_title_string = 'Grid cells with at least {0:d} fronts'.format( min_num_fronts) both_fronts_map_file_name = '{0:s}/num_both_fronts.jpg'.format( output_dir_name) num_both_fronts_matrix[num_both_fronts_matrix > 1] = 1 _plot_front_densities(num_fronts_matrix=num_both_fronts_matrix, colour_map_object=BOTH_FRONTS_COLOUR_MAP_OBJECT, title_string=both_fronts_title_string, annotation_string='(c)', output_file_name=both_fronts_map_file_name, mask_matrix=mask_matrix, add_colour_bar=False)
def find_raw_azimuthal_shear_file( desired_time_unix_sec, spc_date_unix_sec, field_name, data_source, top_directory_name, max_time_offset_sec=DEFAULT_MAX_TIME_OFFSET_FOR_AZ_SHEAR_SEC, raise_error_if_missing=False): """Finds raw azimuthal-shear file on local machine. If you know the exact time step for azimuthal shear, use find_raw_file. However, azimuthal shear is "special" and its times are often offset from those of other radar fields. This method accounts for that and finds az-shear files within some offset of the desired time. :param desired_time_unix_sec: Desired time for azimuthal shear. :param spc_date_unix_sec: SPC date. :param field_name: Field name in GewitterGefahr format (should match either `LOW_LEVEL_SHEAR_NAME` or `MID_LEVEL_SHEAR_NAME`). :param data_source: Data source (either "myrorss" or "mrms"). :param top_directory_name: Name of top-level directory with raw MYRORSS files. :param raise_error_if_missing: Boolean flag. If True and no az-shear file can be found within `max_time_offset_sec` of `desired_time_unix_sec`, will raise error. If False and no az-shear file can be found within `max_time_offset_sec` of `desired_time_unix_sec`, will return None. :return: raw_file_name: Path to raw az-shear file. If file is missing and raise_error_if_missing = False, this is the *expected* path. :raises: ValueError: if raise_error_if_missing = True and file is missing. """ error_checking.assert_is_integer(desired_time_unix_sec) error_checking.assert_is_integer(max_time_offset_sec) error_checking.assert_is_greater(max_time_offset_sec, 0) error_checking.assert_is_boolean(raise_error_if_missing) first_allowed_minute_unix_sec = numpy.round(int(rounder.floor_to_nearest( float(desired_time_unix_sec - max_time_offset_sec), MINUTES_TO_SECONDS))) last_allowed_minute_unix_sec = numpy.round(int(rounder.floor_to_nearest( float(desired_time_unix_sec + max_time_offset_sec), MINUTES_TO_SECONDS))) allowed_minutes_unix_sec = time_periods.range_and_interval_to_list( start_time_unix_sec=first_allowed_minute_unix_sec, end_time_unix_sec=last_allowed_minute_unix_sec, time_interval_sec=MINUTES_TO_SECONDS, include_endpoint=True).astype(int) spc_date_string = time_conversion.time_to_spc_date_string(spc_date_unix_sec) relative_directory_name = get_relative_dir_for_raw_files( field_name=field_name, data_source=data_source) raw_file_names = [] for this_time_unix_sec in allowed_minutes_unix_sec: this_pathless_file_pattern = _get_pathless_raw_file_pattern( this_time_unix_sec) this_file_pattern = '{0:s}/{1:s}/{2:s}/{3:s}'.format( top_directory_name, spc_date_string, relative_directory_name, this_pathless_file_pattern) raw_file_names += glob.glob(this_file_pattern) file_times_unix_sec = [] for this_raw_file_name in raw_file_names: _, this_pathless_file_name = os.path.split(this_raw_file_name) this_time_string, ending = os.path.splitext(this_pathless_file_name) if (ending.rfind("gz") > -1): this_time_string, ending = os.path.splitext(this_time_string) file_times_unix_sec.append(time_conversion.string_to_unix_sec( this_time_string, TIME_FORMAT_SECONDS)) if len(file_times_unix_sec): file_times_unix_sec = numpy.array(file_times_unix_sec) time_differences_sec = numpy.absolute( file_times_unix_sec - desired_time_unix_sec) nearest_index = numpy.argmin(time_differences_sec) min_time_diff_sec = time_differences_sec[nearest_index] else: min_time_diff_sec = numpy.inf if min_time_diff_sec > max_time_offset_sec: if raise_error_if_missing: desired_time_string = time_conversion.unix_sec_to_string( desired_time_unix_sec, TIME_FORMAT_FOR_LOG_MESSAGES) log_string = ('Could not find "{0:s}" file within {1:d} seconds of ' '{2:s}').format(field_name, max_time_offset_sec, desired_time_string) raise ValueError(log_string) return None return raw_file_names[nearest_index]
def _run(prediction_dir_name_by_model, model_weights, first_time_string, last_time_string, output_prediction_dir_name): """Ensembles predictions from two or more NFA models. This is effectively the main method. :param prediction_dir_name_by_model: See documentation at top of file. :param model_weights: Same. :param first_time_string: Same. :param last_time_string: Same. :param output_prediction_dir_name: Same. """ error_checking.assert_is_geq_numpy_array(model_weights, 0.) error_checking.assert_is_leq_numpy_array(model_weights, 1.) error_checking.assert_is_geq(numpy.sum(model_weights), 1. - TOLERANCE) error_checking.assert_is_leq(numpy.sum(model_weights), 1. + TOLERANCE) num_models = len(model_weights) error_checking.assert_is_geq(num_models, 2) these_expected_dim = numpy.array([num_models], dtype=int) error_checking.assert_is_numpy_array( numpy.array(prediction_dir_name_by_model), exact_dimensions=these_expected_dim) first_time_unix_sec = time_conversion.string_to_unix_sec( first_time_string, INPUT_TIME_FORMAT) last_time_unix_sec = time_conversion.string_to_unix_sec( last_time_string, INPUT_TIME_FORMAT) possible_times_unix_sec = time_periods.range_and_interval_to_list( start_time_unix_sec=first_time_unix_sec, end_time_unix_sec=last_time_unix_sec, time_interval_sec=NARR_TIME_INTERVAL_SECONDS, include_endpoint=True) narr_mask_matrix = None for this_time_unix_sec in possible_times_unix_sec: these_prediction_file_names = [''] * num_models for j in range(num_models): these_prediction_file_names[j] = nfa.find_prediction_file( directory_name=prediction_dir_name_by_model[j], first_valid_time_unix_sec=this_time_unix_sec, last_valid_time_unix_sec=this_time_unix_sec, ensembled=False, raise_error_if_missing=j > 0) if not os.path.isfile(these_prediction_file_names[j]): break if these_prediction_file_names[-1] == '': continue this_class_probability_matrix = None for j in range(num_models): print 'Reading data from: "{0:s}"...'.format( these_prediction_file_names[j]) this_predicted_label_matrix, this_metadata_dict = ( nfa.read_gridded_predictions(these_prediction_file_names[j])) if narr_mask_matrix is None: narr_mask_matrix = this_metadata_dict[nfa.NARR_MASK_KEY] + 0 new_class_probability_matrix = to_categorical( y=this_predicted_label_matrix, num_classes=NUM_CLASSES).astype(float) new_class_probability_matrix = (model_weights[j] * new_class_probability_matrix) if this_class_probability_matrix is None: this_class_probability_matrix = (new_class_probability_matrix + 0.) else: this_class_probability_matrix = ( this_class_probability_matrix + new_class_probability_matrix) this_output_file_name = nfa.find_prediction_file( directory_name=output_prediction_dir_name, first_valid_time_unix_sec=this_time_unix_sec, last_valid_time_unix_sec=this_time_unix_sec, ensembled=True, raise_error_if_missing=False) print 'Writing ensembled predictions to: "{0:s}"...\n'.format( this_output_file_name) nfa.write_ensembled_predictions( pickle_file_name=this_output_file_name, class_probability_matrix=this_class_probability_matrix, valid_times_unix_sec=numpy.array([this_time_unix_sec], dtype=int), narr_mask_matrix=narr_mask_matrix, prediction_dir_name_by_model=prediction_dir_name_by_model, model_weights=model_weights)
def _run(input_prediction_dir_name, first_time_string, last_time_string, num_times, binarization_threshold, min_object_area_metres2, min_endpoint_length_metres, top_front_line_dir_name, output_file_name): """Converts gridded CNN predictions to objects. This is effectively the main method. :param input_prediction_dir_name: See documentation at top of file. :param first_time_string: Same. :param last_time_string: Same. :param num_times: Same. :param binarization_threshold: Same. :param min_object_area_metres2: Same. :param min_endpoint_length_metres: Same. :param top_front_line_dir_name: Same. :param output_file_name: Same. """ grid_spacing_metres = nwp_model_utils.get_xy_grid_spacing( model_name=nwp_model_utils.NARR_MODEL_NAME)[0] num_grid_rows, num_grid_columns = nwp_model_utils.get_grid_dimensions( model_name=nwp_model_utils.NARR_MODEL_NAME) first_time_unix_sec = time_conversion.string_to_unix_sec( first_time_string, INPUT_TIME_FORMAT) last_time_unix_sec = time_conversion.string_to_unix_sec( last_time_string, INPUT_TIME_FORMAT) possible_times_unix_sec = time_periods.range_and_interval_to_list( start_time_unix_sec=first_time_unix_sec, end_time_unix_sec=last_time_unix_sec, time_interval_sec=NARR_TIME_INTERVAL_SECONDS, include_endpoint=True) numpy.random.shuffle(possible_times_unix_sec) unix_times_sec = [] list_of_predicted_region_tables = [] num_times_done = 0 narr_mask_matrix = None for i in range(len(possible_times_unix_sec)): if num_times_done == num_times: break this_prediction_file_name = ml_utils.find_gridded_prediction_file( directory_name=input_prediction_dir_name, first_target_time_unix_sec=possible_times_unix_sec[i], last_target_time_unix_sec=possible_times_unix_sec[i], raise_error_if_missing=False) if not os.path.isfile(this_prediction_file_name): continue num_times_done += 1 unix_times_sec.append(possible_times_unix_sec[i]) print 'Reading data from: "{0:s}"...'.format(this_prediction_file_name) this_prediction_dict = ml_utils.read_gridded_predictions( this_prediction_file_name) class_probability_matrix = this_prediction_dict[ ml_utils.PROBABILITY_MATRIX_KEY] if narr_mask_matrix is None: narr_mask_matrix = numpy.invert( numpy.isnan(class_probability_matrix[0, ..., 0])).astype(int) # TODO(thunderhoser): This should be a separate method. class_probability_matrix[..., front_utils.NO_FRONT_INTEGER_ID][ numpy.isnan(class_probability_matrix[ ..., front_utils.NO_FRONT_INTEGER_ID])] = 1. class_probability_matrix[numpy.isnan(class_probability_matrix)] = 0. print 'Determinizing probabilities...' this_predicted_label_matrix = object_eval.determinize_probabilities( class_probability_matrix=this_prediction_dict[ ml_utils.PROBABILITY_MATRIX_KEY], binarization_threshold=binarization_threshold) print 'Converting image to frontal regions...' list_of_predicted_region_tables.append( object_eval.images_to_regions( predicted_label_matrix=this_predicted_label_matrix, image_times_unix_sec=possible_times_unix_sec[[i]])) print 'Throwing out frontal regions with area < {0:f} km^2...'.format( METRES2_TO_KM2 * min_object_area_metres2) list_of_predicted_region_tables[ -1] = object_eval.discard_regions_with_small_area( predicted_region_table=list_of_predicted_region_tables[-1], x_grid_spacing_metres=grid_spacing_metres, y_grid_spacing_metres=grid_spacing_metres, min_area_metres2=min_object_area_metres2) print 'Skeletonizing frontal regions...' list_of_predicted_region_tables[ -1] = object_eval.skeletonize_frontal_regions( predicted_region_table=list_of_predicted_region_tables[-1], num_grid_rows=num_grid_rows, num_grid_columns=num_grid_columns) list_of_predicted_region_tables[-1] = object_eval.find_main_skeletons( predicted_region_table=list_of_predicted_region_tables[-1], image_times_unix_sec=possible_times_unix_sec[[i]], num_grid_rows=num_grid_rows, num_grid_columns=num_grid_columns, x_grid_spacing_metres=grid_spacing_metres, y_grid_spacing_metres=grid_spacing_metres, min_endpoint_length_metres=min_endpoint_length_metres) if num_times_done != num_times: print '\n' if len(list_of_predicted_region_tables) == 1: continue list_of_predicted_region_tables[-1] = ( list_of_predicted_region_tables[-1].align( list_of_predicted_region_tables[0], axis=1)[0]) print SEPARATOR_STRING unix_times_sec = numpy.array(unix_times_sec, dtype=int) predicted_region_table = pandas.concat(list_of_predicted_region_tables, axis=0, ignore_index=True) predicted_region_table = object_eval.convert_regions_rowcol_to_narr_xy( predicted_region_table=predicted_region_table, are_predictions_from_fcn=False) actual_polyline_table = _read_actual_polylines( top_input_dir_name=top_front_line_dir_name, unix_times_sec=unix_times_sec, narr_mask_matrix=narr_mask_matrix) print SEPARATOR_STRING actual_polyline_table = object_eval.project_polylines_latlng_to_narr( actual_polyline_table) print 'Writing predicted and observed objects to: "{0:s}"...'.format( output_file_name) object_eval.write_predictions_and_obs( predicted_region_table=predicted_region_table, actual_polyline_table=actual_polyline_table, pickle_file_name=output_file_name)
def _run(top_input_dir_name, first_time_string, last_time_string, pressure_level_mb, top_output_dir_name): """Computes theta_w (wet-bulb potential temperature) for NARR data. This is effectively the main method. :param top_input_dir_name: See documentation at top of file. :param first_time_string: Same. :param last_time_string: Same. :param pressure_level_mb: Same. :param top_output_dir_name: Same. """ if pressure_level_mb <= 0: pressure_level_mb = None if pressure_level_mb is None: pressure_in_file_name_mb = DUMMY_PRESSURE_LEVEL_MB + 0 else: pressure_in_file_name_mb = pressure_level_mb + 0 first_time_unix_sec = time_conversion.string_to_unix_sec( first_time_string, INPUT_TIME_FORMAT) last_time_unix_sec = time_conversion.string_to_unix_sec( last_time_string, INPUT_TIME_FORMAT) valid_times_unix_sec = time_periods.range_and_interval_to_list( start_time_unix_sec=first_time_unix_sec, end_time_unix_sec=last_time_unix_sec, time_interval_sec=TIME_INTERVAL_SECONDS) num_times = len(valid_times_unix_sec) this_pressure_matrix_pascals = None for i in range(num_times): this_temperature_file_name = processed_narr_io.find_file_for_one_time( top_directory_name=top_input_dir_name, field_name=processed_narr_io.TEMPERATURE_NAME, pressure_level_mb=pressure_in_file_name_mb, valid_time_unix_sec=valid_times_unix_sec[i]) print 'Reading data from: "{0:s}"...'.format( this_temperature_file_name) this_temperature_matrix_kelvins = ( processed_narr_io.read_fields_from_file( this_temperature_file_name)[0]) this_humidity_file_name = processed_narr_io.find_file_for_one_time( top_directory_name=top_input_dir_name, field_name=processed_narr_io.SPECIFIC_HUMIDITY_NAME, pressure_level_mb=pressure_in_file_name_mb, valid_time_unix_sec=valid_times_unix_sec[i]) print 'Reading data from: "{0:s}"...'.format(this_humidity_file_name) this_humidity_matrix_kg_kg01 = ( processed_narr_io.read_fields_from_file(this_humidity_file_name)[0] ) if pressure_level_mb is None: this_pressure_file_name = processed_narr_io.find_file_for_one_time( top_directory_name=top_input_dir_name, field_name=processed_narr_io.HEIGHT_NAME, pressure_level_mb=pressure_in_file_name_mb, valid_time_unix_sec=valid_times_unix_sec[i]) print 'Reading data from: "{0:s}"...'.format( this_pressure_file_name) this_pressure_matrix_pascals = ( processed_narr_io.read_fields_from_file( this_pressure_file_name)[0]) print this_pressure_matrix_pascals[:5, :5] else: if this_pressure_matrix_pascals is None: this_pressure_matrix_pascals = numpy.full( this_humidity_matrix_kg_kg01.shape, pressure_level_mb * MB_TO_PASCALS) this_dewpoint_matrix_kelvins = ( moisture_conversions.specific_humidity_to_dewpoint( specific_humidities_kg_kg01=this_humidity_matrix_kg_kg01, total_pressures_pascals=this_pressure_matrix_pascals)) this_wb_temp_matrix_kelvins = ( ge_conversions.dewpoint_to_wet_bulb_temperature( dewpoints_kelvins=this_dewpoint_matrix_kelvins, temperatures_kelvins=this_temperature_matrix_kelvins, total_pressures_pascals=this_pressure_matrix_pascals)) this_theta_w_matrix_kelvins = ( temperature_conversions.temperatures_to_potential_temperatures( temperatures_kelvins=this_wb_temp_matrix_kelvins, total_pressures_pascals=this_pressure_matrix_pascals)) this_theta_w_file_name = processed_narr_io.find_file_for_one_time( top_directory_name=top_output_dir_name, field_name=processed_narr_io.WET_BULB_THETA_NAME, pressure_level_mb=pressure_in_file_name_mb, valid_time_unix_sec=valid_times_unix_sec[i], raise_error_if_missing=False) print 'Writing theta_w field to: "{0:s}"...\n'.format( this_theta_w_file_name) processed_narr_io.write_fields_to_file( pickle_file_name=this_theta_w_file_name, field_matrix=this_theta_w_matrix_kelvins, field_name=processed_narr_io.WET_BULB_THETA_NAME, pressure_level_pascals=pressure_in_file_name_mb * MB_TO_PASCALS, valid_times_unix_sec=valid_times_unix_sec[[i]])
def full_size_examples_to_eval_pairs( model_object, first_target_time_unix_sec, last_target_time_unix_sec, num_target_times_to_sample, num_points_per_time, top_narr_directory_name, top_frontal_grid_dir_name, narr_predictor_names, pressure_level_mb, dilation_distance_metres, num_classes, predictor_time_step_offsets=None, num_lead_time_steps=None, isotonic_model_object_by_class=None): """Creates evaluation pairs from full-size 3-D or 4-D examples. P = number of evaluation pairs created by this method K = number of classes :param model_object: See documentation for `downsized_examples_to_eval_pairs`. :param first_target_time_unix_sec: Same. :param last_target_time_unix_sec: Same. :param num_target_times_to_sample: Same. :param num_points_per_time: Same. :param top_narr_directory_name: Same. :param top_frontal_grid_dir_name: Same. :param narr_predictor_names: Same. :param pressure_level_mb: Same. :param dilation_distance_metres: Same. :param num_classes: Same. :param predictor_time_step_offsets: Same. :param num_lead_time_steps: Same. :param isotonic_model_object_by_class: Same. :return: class_probability_matrix: Same. :return: observed_labels: Same. """ error_checking.assert_is_integer(num_target_times_to_sample) error_checking.assert_is_greater(num_target_times_to_sample, 0) error_checking.assert_is_integer(num_points_per_time) error_checking.assert_is_greater(num_points_per_time, 0) error_checking.assert_is_integer(num_classes) error_checking.assert_is_geq(num_classes, 2) if predictor_time_step_offsets is None: num_dimensions_per_example = 3 else: num_dimensions_per_example = 4 target_times_unix_sec = time_periods.range_and_interval_to_list( start_time_unix_sec=first_target_time_unix_sec, end_time_unix_sec=last_target_time_unix_sec, time_interval_sec=NARR_TIME_INTERVAL_SECONDS, include_endpoint=True) numpy.random.shuffle(target_times_unix_sec) target_times_unix_sec = target_times_unix_sec[:num_target_times_to_sample] target_time_strings = [ time_conversion.unix_sec_to_string(t, TIME_FORMAT_FOR_LOG_MESSAGES) for t in target_times_unix_sec] class_probability_matrix = numpy.full( (num_target_times_to_sample, num_points_per_time, num_classes), numpy.nan) observed_labels = numpy.full( (num_target_times_to_sample, num_points_per_time), -1, dtype=int) for i in range(num_target_times_to_sample): print 'Drawing evaluation pairs from {0:s}...'.format( target_time_strings[i]) if num_dimensions_per_example == 3: this_class_probability_matrix, this_actual_target_matrix = ( fcn.apply_model_to_3d_example( model_object=model_object, target_time_unix_sec=target_times_unix_sec[i], top_narr_directory_name=top_narr_directory_name, top_frontal_grid_dir_name=top_frontal_grid_dir_name, narr_predictor_names=narr_predictor_names, pressure_level_mb=pressure_level_mb, dilation_distance_metres=dilation_distance_metres, num_classes=num_classes, isotonic_model_object_by_class= isotonic_model_object_by_class)) else: this_class_probability_matrix, this_actual_target_matrix = ( fcn.apply_model_to_4d_example( model_object=model_object, target_time_unix_sec=target_times_unix_sec[i], num_predictor_time_steps=predictor_time_step_offsets, num_lead_time_steps=num_lead_time_steps, top_narr_directory_name=top_narr_directory_name, top_frontal_grid_dir_name=top_frontal_grid_dir_name, narr_predictor_names=narr_predictor_names, pressure_level_mb=pressure_level_mb, dilation_distance_metres=dilation_distance_metres, num_classes=num_classes, isotonic_model_object_by_class= isotonic_model_object_by_class)) these_row_indices, these_column_indices = _get_random_sample_points( num_points=num_points_per_time, for_downsized_examples=False) class_probability_matrix[i, ...] = this_class_probability_matrix[ 0, these_row_indices, these_column_indices, ...] this_actual_target_matrix = this_actual_target_matrix[ 0, these_row_indices, these_column_indices] observed_labels[i, :] = numpy.reshape( this_actual_target_matrix, this_actual_target_matrix.size) new_dimensions = ( num_target_times_to_sample * num_points_per_time, num_classes) class_probability_matrix = numpy.reshape( class_probability_matrix, new_dimensions) observed_labels = numpy.reshape(observed_labels, observed_labels.size) return class_probability_matrix, observed_labels
def _process_wpc_bulletins(first_time_string, last_time_string, top_bulletin_dir_name, top_polyline_dir_name, top_frontal_grid_dir_name): """Turns warm/cold fronts from WPC bulletins into polylines and NARR grids. :param first_time_string: Time (format "yyyymmddHH"). This script turns warm/cold fronts into polylines and NARR grids for all 3-hour time steps from `first_time_string`...`last_time_string`. :param last_time_string: See above. :param top_bulletin_dir_name: [input] Name of top-level directory with WPC bulletins. :param top_polyline_dir_name: [output] Name of top-level directory for Pickle files with frontal polylines. :param top_frontal_grid_dir_name: [output] Name of top-level directory for Pickle files with frontal grids. """ first_time_unix_sec = time_conversion.string_to_unix_sec( first_time_string, INPUT_TIME_FORMAT) last_time_unix_sec = time_conversion.string_to_unix_sec( last_time_string, INPUT_TIME_FORMAT) valid_times_unix_sec = time_periods.range_and_interval_to_list( start_time_unix_sec=first_time_unix_sec, end_time_unix_sec=last_time_unix_sec, time_interval_sec=TIME_INTERVAL_SECONDS, include_endpoint=True) num_times = len(valid_times_unix_sec) for i in range(num_times): this_bulletin_file_name = wpc_bulletin_io.find_file( valid_time_unix_sec=valid_times_unix_sec[i], top_directory_name=top_bulletin_dir_name, raise_error_if_missing=False) if not os.path.isfile(this_bulletin_file_name): warning_string = 'Cannot find file. Expected at: "{0:s}"'.format( this_bulletin_file_name) warnings.warn(warning_string) continue print 'Reading data from: "{0:s}"...'.format(this_bulletin_file_name) this_polyline_table = wpc_bulletin_io.read_fronts_from_file( this_bulletin_file_name) this_polyline_file_name = fronts_io.find_file_for_one_time( top_directory_name=top_polyline_dir_name, file_type=fronts_io.POLYLINE_FILE_TYPE, valid_time_unix_sec=valid_times_unix_sec[i], raise_error_if_missing=False) print 'Writing polylines to file: "{0:s}"...'.format( this_polyline_file_name) fronts_io.write_polylines_to_file( front_table=this_polyline_table, pickle_file_name=this_polyline_file_name) print 'Converting polylines to NARR grids...' this_frontal_grid_table = front_utils.many_polylines_to_narr_grid( polyline_table=this_polyline_table, dilation_distance_metres=DILATION_DISTANCE_METRES) this_gridded_file_name = fronts_io.find_file_for_one_time( top_directory_name=top_frontal_grid_dir_name, file_type=fronts_io.GRIDDED_FILE_TYPE, valid_time_unix_sec=valid_times_unix_sec[i], raise_error_if_missing=False) print 'Writing NARR grids to file: "{0:s}"...\n'.format( this_gridded_file_name) fronts_io.write_narr_grids_to_file( frontal_grid_table=this_frontal_grid_table, pickle_file_name=this_gridded_file_name)
def _run(top_tracking_dir_name, first_spc_date_string, last_spc_date_string, colour_map_name, min_plot_latitude_deg, max_plot_latitude_deg, min_plot_longitude_deg, max_plot_longitude_deg, output_file_name): """Plots storm tracks for a continuous time period. This is effectively the main method. :param top_tracking_dir_name: See documentation at top of file. :param first_spc_date_string: Same. :param last_spc_date_string: Same. :param colour_map_name: Same. :param min_plot_latitude_deg: Same. :param max_plot_latitude_deg: Same. :param min_plot_longitude_deg: Same. :param max_plot_longitude_deg: Same. :param output_file_name: Same. """ if colour_map_name in ['', 'None']: colour_map_object = 'random' else: colour_map_object = pyplot.cm.get_cmap(colour_map_name) if min_plot_latitude_deg <= SENTINEL_VALUE: min_plot_latitude_deg = None if max_plot_latitude_deg <= SENTINEL_VALUE: max_plot_latitude_deg = None if min_plot_longitude_deg <= SENTINEL_VALUE: min_plot_longitude_deg = None if max_plot_longitude_deg <= SENTINEL_VALUE: max_plot_longitude_deg = None file_system_utils.mkdir_recursive_if_necessary(file_name=output_file_name) spc_date_strings = time_conversion.get_spc_dates_in_range( first_spc_date_string=first_spc_date_string, last_spc_date_string=last_spc_date_string) list_of_storm_object_tables = [] for this_spc_date_string in spc_date_strings: these_file_names = tracking_io.find_files_one_spc_date( top_tracking_dir_name=top_tracking_dir_name, tracking_scale_metres2=echo_top_tracking. DUMMY_TRACKING_SCALE_METRES2, source_name=tracking_utils.SEGMOTION_NAME, spc_date_string=this_spc_date_string, raise_error_if_missing=False)[0] if len(these_file_names) == 0: continue this_storm_object_table = tracking_io.read_many_files( these_file_names)[REQUIRED_COLUMNS] list_of_storm_object_tables.append(this_storm_object_table) if this_spc_date_string != spc_date_strings[-1]: print(MINOR_SEPARATOR_STRING) if len(list_of_storm_object_tables) == 1: continue list_of_storm_object_tables[-1] = list_of_storm_object_tables[ -1].align(list_of_storm_object_tables[0], axis=1)[0] print(SEPARATOR_STRING) storm_object_table = pandas.concat(list_of_storm_object_tables, axis=0, ignore_index=True) # TODO(thunderhoser): HACK first_time_unix_sec = time_conversion.string_to_unix_sec( '2011-04-27-20', '%Y-%m-%d-%H') storm_object_table = storm_object_table.loc[storm_object_table[ tracking_utils.VALID_TIME_COLUMN] >= first_time_unix_sec] if min_plot_latitude_deg is None: min_plot_latitude_deg = numpy.min( storm_object_table[tracking_utils.CENTROID_LATITUDE_COLUMN].values ) - LATLNG_BUFFER_DEG if max_plot_latitude_deg is None: max_plot_latitude_deg = numpy.max( storm_object_table[tracking_utils.CENTROID_LATITUDE_COLUMN].values ) + LATLNG_BUFFER_DEG if min_plot_longitude_deg is None: min_plot_longitude_deg = numpy.min( storm_object_table[tracking_utils.CENTROID_LONGITUDE_COLUMN].values ) - LATLNG_BUFFER_DEG if max_plot_longitude_deg is None: max_plot_longitude_deg = numpy.max( storm_object_table[tracking_utils.CENTROID_LONGITUDE_COLUMN].values ) + LATLNG_BUFFER_DEG _, axes_object, basemap_object = ( plotting_utils.create_equidist_cylindrical_map( min_latitude_deg=min_plot_latitude_deg, max_latitude_deg=max_plot_latitude_deg, min_longitude_deg=min_plot_longitude_deg, max_longitude_deg=max_plot_longitude_deg, resolution_string='i')) # plotting_utils.plot_coastlines( # basemap_object=basemap_object, axes_object=axes_object, # line_colour=BORDER_COLOUR # ) plotting_utils.plot_countries(basemap_object=basemap_object, axes_object=axes_object, line_colour=BORDER_COLOUR) plotting_utils.plot_states_and_provinces(basemap_object=basemap_object, axes_object=axes_object, line_colour=BORDER_COLOUR) plotting_utils.plot_parallels(basemap_object=basemap_object, axes_object=axes_object, num_parallels=NUM_PARALLELS, line_colour=numpy.full(3, 1.)) plotting_utils.plot_meridians(basemap_object=basemap_object, axes_object=axes_object, num_meridians=NUM_MERIDIANS, line_colour=numpy.full(3, 1.)) colour_bar_object = storm_plotting.plot_storm_tracks( storm_object_table=storm_object_table, axes_object=axes_object, basemap_object=basemap_object, colour_map_object=colour_map_object) valid_times_unix_sec = ( storm_object_table[tracking_utils.VALID_TIME_COLUMN].values) # TODO(thunderhoser): HACK tick_times_unix_sec = time_periods.range_and_interval_to_list( start_time_unix_sec=numpy.min(valid_times_unix_sec), end_time_unix_sec=numpy.max(valid_times_unix_sec), time_interval_sec=1800, include_endpoint=True) tick_time_strings = [ time_conversion.unix_sec_to_string(t, COLOUR_BAR_TIME_FORMAT) for t in tick_times_unix_sec ] colour_bar_object.set_ticks(tick_times_unix_sec) colour_bar_object.set_ticklabels(tick_time_strings) print('Saving figure to: "{0:s}"...'.format(output_file_name)) pyplot.savefig(output_file_name, dpi=FIGURE_RESOLUTION_DPI, pad_inches=0, bbox_inches='tight') pyplot.close()
def _run(top_gridrad_dir_name, first_spc_date_string, last_spc_date_string, colour_map_name, grid_spacing_metres, output_file_name): """Plots GridRad domains. This is effectively the main method. :param top_gridrad_dir_name: See documentation at top of file. :param first_spc_date_string: Same. :param last_spc_date_string: Same. :param colour_map_name: Same. :param grid_spacing_metres: Same. :param output_file_name: Same. """ colour_map_object = pyplot.get_cmap(colour_map_name) file_system_utils.mkdir_recursive_if_necessary(file_name=output_file_name) first_time_unix_sec = time_conversion.get_start_of_spc_date( first_spc_date_string) last_time_unix_sec = time_conversion.get_end_of_spc_date( last_spc_date_string) valid_times_unix_sec = time_periods.range_and_interval_to_list( start_time_unix_sec=first_time_unix_sec, end_time_unix_sec=last_time_unix_sec, time_interval_sec=TIME_INTERVAL_SEC, include_endpoint=True) valid_spc_date_strings = [ time_conversion.time_to_spc_date_string(t) for t in valid_times_unix_sec ] domain_min_latitudes_deg = [] domain_max_latitudes_deg = [] domain_min_longitudes_deg = [] domain_max_longitudes_deg = [] prev_domain_limits_deg = numpy.full(4, numpy.nan) prev_spc_date_string = 'foo' num_times = len(valid_times_unix_sec) for i in range(num_times): this_gridrad_file_name = gridrad_io.find_file( unix_time_sec=valid_times_unix_sec[i], top_directory_name=top_gridrad_dir_name, raise_error_if_missing=False) if not os.path.isfile(this_gridrad_file_name): continue these_domain_limits_deg = _get_domain_one_file(this_gridrad_file_name) same_domain = (valid_spc_date_strings[i] == prev_spc_date_string and numpy.allclose(these_domain_limits_deg, prev_domain_limits_deg, TOLERANCE)) if same_domain: continue prev_domain_limits_deg = these_domain_limits_deg + 0. prev_spc_date_string = valid_spc_date_strings[i] domain_min_latitudes_deg.append(these_domain_limits_deg[0]) domain_max_latitudes_deg.append(these_domain_limits_deg[1]) domain_min_longitudes_deg.append(these_domain_limits_deg[2]) domain_max_longitudes_deg.append(these_domain_limits_deg[3]) print(SEPARATOR_STRING) domain_min_latitudes_deg = numpy.array(domain_min_latitudes_deg) domain_max_latitudes_deg = numpy.array(domain_max_latitudes_deg) domain_min_longitudes_deg = numpy.array(domain_min_longitudes_deg) domain_max_longitudes_deg = numpy.array(domain_max_longitudes_deg) num_domains = len(domain_min_latitudes_deg) grid_metadata_dict = grids.create_equidistant_grid( min_latitude_deg=OVERALL_MIN_LATITUDE_DEG, max_latitude_deg=OVERALL_MAX_LATITUDE_DEG, min_longitude_deg=OVERALL_MIN_LONGITUDE_DEG, max_longitude_deg=OVERALL_MAX_LONGITUDE_DEG, x_spacing_metres=grid_spacing_metres, y_spacing_metres=grid_spacing_metres, azimuthal=False) unique_x_coords_metres = grid_metadata_dict[grids.X_COORDS_KEY] unique_y_coords_metres = grid_metadata_dict[grids.Y_COORDS_KEY] projection_object = grid_metadata_dict[grids.PROJECTION_KEY] x_coord_matrix_metres, y_coord_matrix_metres = grids.xy_vectors_to_matrices( x_unique_metres=unique_x_coords_metres, y_unique_metres=unique_y_coords_metres) latitude_matrix_deg, longitude_matrix_deg = ( projections.project_xy_to_latlng(x_coords_metres=x_coord_matrix_metres, y_coords_metres=y_coord_matrix_metres, projection_object=projection_object)) num_grid_rows = latitude_matrix_deg.shape[0] num_grid_columns = latitude_matrix_deg.shape[1] num_days_matrix = numpy.full((num_grid_rows, num_grid_columns), 0) for i in range(num_domains): if numpy.mod(i, 10) == 0: print('Have found grid points in {0:d} of {1:d} domains...'.format( i, num_domains)) this_lat_flag_matrix = numpy.logical_and( latitude_matrix_deg >= domain_min_latitudes_deg[i], latitude_matrix_deg <= domain_max_latitudes_deg[i]) this_lng_flag_matrix = numpy.logical_and( longitude_matrix_deg >= domain_min_longitudes_deg[i], longitude_matrix_deg <= domain_max_longitudes_deg[i]) num_days_matrix += numpy.logical_and(this_lat_flag_matrix, this_lng_flag_matrix).astype(int) print(SEPARATOR_STRING) figure_object, axes_object = _plot_data( num_days_matrix=num_days_matrix, grid_metadata_dict=grid_metadata_dict, colour_map_object=colour_map_object) plotting_utils.label_axes(axes_object=axes_object, label_string='(c)') print('Saving figure to: "{0:s}"...'.format(output_file_name)) figure_object.savefig(output_file_name, dpi=FIGURE_RESOLUTION_DPI, pad_inches=0, bbox_inches='tight') pyplot.close(figure_object)
def _run(top_narr_dir_name, top_front_line_dir_name, top_wpc_bulletin_dir_name, first_time_string, last_time_string, pressure_level_mb, thermal_field_name, thermal_colour_map_name, max_thermal_prctile_for_colours, first_letter_label, letter_interval, output_dir_name): """Plots predictors on full NARR grid. This is effectively the main method. :param top_narr_dir_name: See documentation at top of file. :param top_front_line_dir_name: Same. :param top_wpc_bulletin_dir_name: Same. :param first_time_string: Same. :param last_time_string: Same. :param pressure_level_mb: Same. :param thermal_field_name: Same. :param thermal_colour_map_name: Same. :param max_thermal_prctile_for_colours: Same. :param first_letter_label: Same. :param letter_interval: Same. :param output_dir_name: Same. :raises: ValueError: if `thermal_field_name not in VALID_THERMAL_FIELD_NAMES`. """ # Check input args. if top_wpc_bulletin_dir_name in ['', 'None']: top_wpc_bulletin_dir_name = None if first_letter_label in ['', 'None']: first_letter_label = None if thermal_field_name not in VALID_THERMAL_FIELD_NAMES: error_string = ( '\n{0:s}\nValid thermal fields (listed above) do not include ' '"{1:s}".' ).format(str(VALID_THERMAL_FIELD_NAMES), thermal_field_name) raise ValueError(error_string) thermal_colour_map_object = pyplot.cm.get_cmap(thermal_colour_map_name) file_system_utils.mkdir_recursive_if_necessary( directory_name=output_dir_name) first_time_unix_sec = time_conversion.string_to_unix_sec( first_time_string, DEFAULT_TIME_FORMAT) last_time_unix_sec = time_conversion.string_to_unix_sec( last_time_string, DEFAULT_TIME_FORMAT) valid_times_unix_sec = time_periods.range_and_interval_to_list( start_time_unix_sec=first_time_unix_sec, end_time_unix_sec=last_time_unix_sec, time_interval_sec=NARR_TIME_INTERVAL_SEC, include_endpoint=True) # Read metadata for NARR grid. narr_latitude_matrix_deg, narr_longitude_matrix_deg = ( nwp_model_utils.get_latlng_grid_point_matrices( model_name=nwp_model_utils.NARR_MODEL_NAME) ) narr_rotation_cos_matrix, narr_rotation_sin_matrix = ( nwp_model_utils.get_wind_rotation_angles( latitudes_deg=narr_latitude_matrix_deg, longitudes_deg=narr_longitude_matrix_deg, model_name=nwp_model_utils.NARR_MODEL_NAME) ) narr_row_limits, narr_column_limits = ( nwp_plotting.latlng_limits_to_rowcol_limits( min_latitude_deg=MIN_LATITUDE_DEG, max_latitude_deg=MAX_LATITUDE_DEG, min_longitude_deg=MIN_LONGITUDE_DEG, max_longitude_deg=MAX_LONGITUDE_DEG, model_name=nwp_model_utils.NARR_MODEL_NAME) ) narr_rotation_cos_matrix = narr_rotation_cos_matrix[ narr_row_limits[0]:(narr_row_limits[1] + 1), narr_column_limits[0]:(narr_column_limits[1] + 1) ] narr_rotation_sin_matrix = narr_rotation_sin_matrix[ narr_row_limits[0]:(narr_row_limits[1] + 1), narr_column_limits[0]:(narr_column_limits[1] + 1) ] # Do plotting. narr_field_names = [ processed_narr_io.U_WIND_GRID_RELATIVE_NAME, processed_narr_io.V_WIND_GRID_RELATIVE_NAME, thermal_field_name ] this_letter_label = None for this_time_unix_sec in valid_times_unix_sec: this_file_name = fronts_io.find_file_for_one_time( top_directory_name=top_front_line_dir_name, file_type=fronts_io.POLYLINE_FILE_TYPE, valid_time_unix_sec=this_time_unix_sec) print 'Reading data from: "{0:s}"...'.format(this_file_name) this_polyline_table = fronts_io.read_polylines_from_file(this_file_name) if top_wpc_bulletin_dir_name is None: this_high_low_table = None else: this_file_name = wpc_bulletin_io.find_file( top_directory_name=top_wpc_bulletin_dir_name, valid_time_unix_sec=this_time_unix_sec) print 'Reading data from: "{0:s}"...'.format(this_file_name) this_high_low_table = wpc_bulletin_io.read_highs_and_lows( this_file_name) this_predictor_matrix = None for this_field_name in narr_field_names: this_file_name = processed_narr_io.find_file_for_one_time( top_directory_name=top_narr_dir_name, field_name=this_field_name, pressure_level_mb=pressure_level_mb, valid_time_unix_sec=this_time_unix_sec) print 'Reading data from: "{0:s}"...'.format(this_file_name) this_field_matrix = processed_narr_io.read_fields_from_file( this_file_name )[0][0, ...] this_field_matrix = utils.fill_nans(this_field_matrix) this_field_matrix = this_field_matrix[ narr_row_limits[0]:(narr_row_limits[1] + 1), narr_column_limits[0]:(narr_column_limits[1] + 1) ] if this_field_name in [processed_narr_io.TEMPERATURE_NAME, processed_narr_io.WET_BULB_THETA_NAME]: this_field_matrix -= ZERO_CELSIUS_IN_KELVINS if this_field_name == processed_narr_io.SPECIFIC_HUMIDITY_NAME: this_field_matrix = this_field_matrix * KG_TO_GRAMS this_field_matrix = numpy.expand_dims(this_field_matrix, axis=-1) if this_predictor_matrix is None: this_predictor_matrix = this_field_matrix + 0. else: this_predictor_matrix = numpy.concatenate( (this_predictor_matrix, this_field_matrix), axis=-1) u_wind_index = narr_field_names.index( processed_narr_io.U_WIND_GRID_RELATIVE_NAME) v_wind_index = narr_field_names.index( processed_narr_io.V_WIND_GRID_RELATIVE_NAME) (this_predictor_matrix[..., u_wind_index], this_predictor_matrix[..., v_wind_index] ) = nwp_model_utils.rotate_winds_to_earth_relative( u_winds_grid_relative_m_s01=this_predictor_matrix[ ..., u_wind_index], v_winds_grid_relative_m_s01=this_predictor_matrix[ ..., v_wind_index], rotation_angle_cosines=narr_rotation_cos_matrix, rotation_angle_sines=narr_rotation_sin_matrix) this_title_string = time_conversion.unix_sec_to_string( this_time_unix_sec, NICE_TIME_FORMAT) if pressure_level_mb == 1013: this_title_string += ' at surface' else: this_title_string += ' at {0:d} mb'.format(pressure_level_mb) this_default_time_string = time_conversion.unix_sec_to_string( this_time_unix_sec, DEFAULT_TIME_FORMAT) this_output_file_name = '{0:s}/predictors_{1:s}.jpg'.format( output_dir_name, this_default_time_string) if first_letter_label is not None: if this_letter_label is None: this_letter_label = first_letter_label else: this_letter_label = chr( ord(this_letter_label) + letter_interval ) _plot_one_time( predictor_matrix=this_predictor_matrix, predictor_names=narr_field_names, front_polyline_table=this_polyline_table, high_low_table=this_high_low_table, thermal_colour_map_object=thermal_colour_map_object, max_thermal_prctile_for_colours=max_thermal_prctile_for_colours, narr_row_limits=narr_row_limits, narr_column_limits=narr_column_limits, title_string=this_title_string, letter_label=this_letter_label, output_file_name=this_output_file_name) print '\n'
def find_raw_file_inexact_time(desired_time_unix_sec, spc_date_string, field_name, data_source, top_directory_name, height_m_asl=None, max_time_offset_sec=None, raise_error_if_missing=False): """Finds raw file at inexact time. If you know the exact valid time, use `find_raw_file`. :param desired_time_unix_sec: Desired valid time. :param spc_date_string: SPC date (format "yyyymmdd"). :param field_name: Field name in GewitterGefahr format. :param data_source: Data source (string). :param top_directory_name: Name of top-level directory with raw files. :param height_m_asl: Radar height (metres above sea level). :param max_time_offset_sec: Maximum offset between actual and desired valid time. For example, if `desired_time_unix_sec` is 162933 UTC 5 Jan 2018 and `max_time_offset_sec` = 60, this method will look for az-shear at valid times from 162833...163033 UTC 5 Jan 2018. If None, this defaults to `DEFAULT_MAX_TIME_OFFSET_FOR_AZ_SHEAR_SEC` for azimuthal-shear fields and `DEFAULT_MAX_TIME_OFFSET_FOR_NON_SHEAR_SEC` for all other fields. :param raise_error_if_missing: Boolean flag. If no file is found and raise_error_if_missing = True, this method will error out. If no file is found and raise_error_if_missing = False, will return None. :return: raw_file_name: Path to raw file. :raises: ValueError: if no file is found and raise_error_if_missing = True. """ # Error-checking. error_checking.assert_is_integer(desired_time_unix_sec) _ = time_conversion.spc_date_string_to_unix_sec(spc_date_string) error_checking.assert_is_boolean(raise_error_if_missing) radar_utils.check_field_name(field_name) if max_time_offset_sec is None: if field_name in AZIMUTHAL_SHEAR_FIELD_NAMES: max_time_offset_sec = DEFAULT_MAX_TIME_OFFSET_FOR_AZ_SHEAR_SEC else: max_time_offset_sec = DEFAULT_MAX_TIME_OFFSET_FOR_NON_SHEAR_SEC error_checking.assert_is_integer(max_time_offset_sec) error_checking.assert_is_greater(max_time_offset_sec, 0) first_allowed_minute_unix_sec = numpy.round( int( rounder.floor_to_nearest( float(desired_time_unix_sec - max_time_offset_sec), MINUTES_TO_SECONDS))) last_allowed_minute_unix_sec = numpy.round( int( rounder.floor_to_nearest( float(desired_time_unix_sec + max_time_offset_sec), MINUTES_TO_SECONDS))) allowed_minutes_unix_sec = time_periods.range_and_interval_to_list( start_time_unix_sec=first_allowed_minute_unix_sec, end_time_unix_sec=last_allowed_minute_unix_sec, time_interval_sec=MINUTES_TO_SECONDS, include_endpoint=True).astype(int) relative_directory_name = get_relative_dir_for_raw_files( field_name=field_name, data_source=data_source, height_m_asl=height_m_asl) raw_file_names = [] for this_time_unix_sec in allowed_minutes_unix_sec: this_pathless_file_pattern = _get_pathless_raw_file_pattern( this_time_unix_sec) this_file_pattern = '{0:s}/{1:s}/{2:s}/{3:s}/{4:s}'.format( top_directory_name, spc_date_string[:4], spc_date_string, relative_directory_name, this_pathless_file_pattern) raw_file_names += glob.glob(this_file_pattern) file_times_unix_sec = [] for this_raw_file_name in raw_file_names: file_times_unix_sec.append(raw_file_name_to_time(this_raw_file_name)) if len(file_times_unix_sec): file_times_unix_sec = numpy.array(file_times_unix_sec) time_differences_sec = numpy.absolute(file_times_unix_sec - desired_time_unix_sec) nearest_index = numpy.argmin(time_differences_sec) min_time_diff_sec = time_differences_sec[nearest_index] else: min_time_diff_sec = numpy.inf if min_time_diff_sec > max_time_offset_sec: if raise_error_if_missing: desired_time_string = time_conversion.unix_sec_to_string( desired_time_unix_sec, TIME_FORMAT_FOR_LOG_MESSAGES) error_string = ( 'Could not find "{0:s}" file within {1:d} seconds of {2:s}.' ).format(field_name, max_time_offset_sec, desired_time_string) raise ValueError(error_string) return None return raw_file_names[nearest_index]
def find_polygon_files_for_spc_date(spc_date_unix_sec=None, top_raw_directory_name=None, tracking_scale_metres2=None, raise_error_if_missing=True): """Finds all polygon files for one SPC date. :param spc_date_unix_sec: SPC date. :param top_raw_directory_name: Name of top-level directory with raw segmotion files. :param tracking_scale_metres2: Tracking scale. :param raise_error_if_missing: If True and no files can be found, this method will raise an error. :return: polygon_file_names: 1-D list of paths to polygon files. """ error_checking.assert_is_string(top_raw_directory_name) spc_date_string = time_conversion.time_to_spc_date_string( spc_date_unix_sec) directory_name = '{0:s}/{1:s}'.format( top_raw_directory_name, _get_relative_polygon_dir_physical_scale(spc_date_string, tracking_scale_metres2)) first_hour_unix_sec = SPC_DATE_START_HOUR * HOURS_TO_SECONDS + ( time_conversion.string_to_unix_sec(spc_date_string, time_conversion.SPC_DATE_FORMAT)) last_hour_unix_sec = SPC_DATE_END_HOUR * HOURS_TO_SECONDS + ( time_conversion.string_to_unix_sec(spc_date_string, time_conversion.SPC_DATE_FORMAT)) hours_in_spc_date_unix_sec = time_periods.range_and_interval_to_list( start_time_unix_sec=first_hour_unix_sec, end_time_unix_sec=last_hour_unix_sec, time_interval_sec=HOURS_TO_SECONDS, include_endpoint=True) polygon_file_names = [] for this_hour_unix_sec in hours_in_spc_date_unix_sec: this_time_string_seconds = time_conversion.unix_sec_to_string( this_hour_unix_sec, TIME_FORMAT_IN_FILES) this_time_string_hours = time_conversion.unix_sec_to_string( this_hour_unix_sec, TIME_FORMAT_IN_FILES_HOUR_ONLY) + '*' this_pathless_file_name_zipped = _get_pathless_polygon_file_name( this_hour_unix_sec, zipped=True) this_pathless_file_pattern_zipped = ( this_pathless_file_name_zipped.replace(this_time_string_seconds, this_time_string_hours)) this_file_pattern_zipped = '{0:s}/{1:s}'.format( directory_name, this_pathless_file_pattern_zipped) these_polygon_file_names_zipped = glob.glob(this_file_pattern_zipped) if these_polygon_file_names_zipped: polygon_file_names += these_polygon_file_names_zipped this_pathless_file_name_unzipped = _get_pathless_polygon_file_name( this_hour_unix_sec, zipped=False) this_pathless_file_pattern_unzipped = ( this_pathless_file_name_unzipped.replace(this_time_string_seconds, this_time_string_hours)) this_file_pattern_unzipped = '{0:s}/{1:s}'.format( directory_name, this_pathless_file_pattern_unzipped) these_polygon_file_names_unzipped = glob.glob( this_file_pattern_unzipped) for this_file_name_unzipped in these_polygon_file_names_unzipped: this_file_name_zipped = (this_file_name_unzipped + GZIP_FILE_EXTENSION) if this_file_name_zipped in polygon_file_names: continue polygon_file_names.append(this_file_name_unzipped) if raise_error_if_missing and not polygon_file_names: raise ValueError('Cannot find any polygon files in directory: ' + directory_name) polygon_file_names.sort() return polygon_file_names
def _run(first_time_string, last_time_string, max_num_examples_per_time, pressure_level_mb, narr_predictor_names, dilation_distance_metres, class_fractions, num_half_rows, num_half_columns, normalization_type_string, top_frontal_grid_dir_name, top_narr_directory_name, narr_mask_file_name, output_dir_name, num_times_per_output_file): """Writes downsized 3-D training examples to files. This is effectively the main method. :param first_time_string: See documentation at top of file. :param last_time_string: Same. :param max_num_examples_per_time: Same. :param pressure_level_mb: Same. :param narr_predictor_names: Same. :param dilation_distance_metres: Same. :param class_fractions: Same. :param num_half_rows: Same. :param num_half_columns: Same. :param normalization_type_string: Same. :param top_frontal_grid_dir_name: Same. :param top_narr_directory_name: Same. :param narr_mask_file_name: Same. :param output_dir_name: Same. :param num_times_per_output_file: Same. """ first_time_unix_sec = time_conversion.string_to_unix_sec( first_time_string, INPUT_TIME_FORMAT) last_time_unix_sec = time_conversion.string_to_unix_sec( last_time_string, INPUT_TIME_FORMAT) target_times_unix_sec = time_periods.range_and_interval_to_list( start_time_unix_sec=first_time_unix_sec, end_time_unix_sec=last_time_unix_sec, time_interval_sec=NARR_TIME_STEP_SECONDS) if narr_mask_file_name == '': narr_mask_matrix = None else: print 'Reading NARR mask from: "{0:s}"...'.format(narr_mask_file_name) narr_mask_matrix = ml_utils.read_narr_mask(narr_mask_file_name) print SEPARATOR_STRING error_checking.assert_is_greater(num_times_per_output_file, 0) num_target_times = len(target_times_unix_sec) this_example_dict = None this_first_time_unix_sec = target_times_unix_sec[0] for i in range(num_target_times): if numpy.mod(i, num_times_per_output_file) == 0 and i != 0: if this_example_dict is not None: this_last_time_unix_sec = target_times_unix_sec[i - 1] this_output_file_name = ( trainval_io.find_downsized_3d_example_file( top_directory_name=output_dir_name, first_target_time_unix_sec=this_first_time_unix_sec, last_target_time_unix_sec=this_last_time_unix_sec, raise_error_if_missing=False)) print 'Writing data to file: "{0:s}"...'.format( this_output_file_name) trainval_io.write_downsized_3d_examples( netcdf_file_name=this_output_file_name, example_dict=this_example_dict, narr_predictor_names=narr_predictor_names, pressure_level_mb=pressure_level_mb, dilation_distance_metres=dilation_distance_metres, narr_mask_matrix=narr_mask_matrix) print SEPARATOR_STRING this_example_dict = None this_first_time_unix_sec = target_times_unix_sec[i] this_new_example_dict = trainval_io.prep_downsized_3d_examples_to_write( target_time_unix_sec=target_times_unix_sec[i], max_num_examples=max_num_examples_per_time, top_narr_directory_name=top_narr_directory_name, top_frontal_grid_dir_name=top_frontal_grid_dir_name, narr_predictor_names=narr_predictor_names, pressure_level_mb=pressure_level_mb, dilation_distance_metres=dilation_distance_metres, class_fractions=class_fractions, num_rows_in_half_grid=num_half_rows, num_columns_in_half_grid=num_half_columns, normalization_type_string=normalization_type_string, narr_mask_matrix=narr_mask_matrix) print '\n' if this_new_example_dict is None: continue if this_example_dict is None: this_example_dict = copy.deepcopy(this_new_example_dict) continue for this_key in trainval_io.MAIN_KEYS: this_example_dict[this_key] = numpy.concatenate( (this_example_dict[this_key], this_new_example_dict[this_key]), axis=0) if this_example_dict is not None: this_last_time_unix_sec = target_times_unix_sec[-1] this_output_file_name = trainval_io.find_downsized_3d_example_file( top_directory_name=output_dir_name, first_target_time_unix_sec=this_first_time_unix_sec, last_target_time_unix_sec=this_last_time_unix_sec, raise_error_if_missing=False) print 'Writing data to file: "{0:s}"...'.format(this_output_file_name) trainval_io.write_downsized_3d_examples( netcdf_file_name=this_output_file_name, example_dict=this_example_dict, narr_predictor_names=narr_predictor_names, pressure_level_mb=pressure_level_mb, dilation_distance_metres=dilation_distance_metres, narr_mask_matrix=narr_mask_matrix)