def _get_online_file_name(unix_time_sec, secondary_source, protocol): """Generates expected file path on FTP or HTTP server. :param unix_time_sec: Valid time. :param secondary_source: String ID for secondary data source. :param protocol: Protocol (either "http" or "ftp"). :return: online_file_name: Expected file path on FTP or HTTP server. """ pathless_file_name = _get_pathless_raw_file_name(unix_time_sec) if secondary_source in SECONDARY_SOURCES_IN_LDAD: first_subdir_name = 'LDAD' second_subdir_name = 'netCDF' else: first_subdir_name = 'point' second_subdir_name = 'netcdf' if protocol == 'ftp': top_directory_name = copy.deepcopy(TOP_FTP_DIRECTORY_NAME) elif protocol == 'http': top_directory_name = copy.deepcopy(TOP_HTTP_DIRECTORY_NAME) online_directory_name = '{0:s}/{1:s}/{2:s}/{3:s}/{4:s}/{5:s}/{6:s}'.format( top_directory_name, time_conversion.unix_sec_to_string(unix_time_sec, TIME_FORMAT_YEAR), time_conversion.unix_sec_to_string(unix_time_sec, TIME_FORMAT_MONTH), time_conversion.unix_sec_to_string( unix_time_sec, TIME_FORMAT_DAY_OF_MONTH), first_subdir_name, secondary_source, second_subdir_name ) return '{0:s}/{1:s}'.format(online_directory_name, pathless_file_name)
def _match_one_time(source_time_unix_sec, target_times_unix_sec, max_diff_seconds): """Matches one source time to nearest target time. :param source_time_unix_sec: Source time. :param target_times_unix_sec: 1-D numpy array of target times. :param max_diff_seconds: Max difference. :return: nearest_index: Index of nearest target time. If `nearest_index == i`, the nearest target time is target_times_unix_sec[i]. :raises: ValueError: if no target time is found within `max_diff_seconds` of `source_time_unix_sec`. """ diffs_seconds = numpy.absolute(source_time_unix_sec - target_times_unix_sec) min_diff_seconds = numpy.min(diffs_seconds) nearest_index = numpy.argmin(diffs_seconds) if min_diff_seconds <= max_diff_seconds: return nearest_index source_time_string = time_conversion.unix_sec_to_string( source_time_unix_sec, TIME_FORMAT) nearest_target_time_string = time_conversion.unix_sec_to_string( target_times_unix_sec[nearest_index], TIME_FORMAT) error_string = ( 'Cannot find target time within {0:d} seconds of source time ({1:s}). ' 'Nearest target time is {2:s}.').format(max_diff_seconds, source_time_string, nearest_target_time_string) raise ValueError(error_string)
def _spc_dates_to_years(spc_date_strings): """Finds first and last years in set of SPC dates. :param spc_date_strings: 1-D list of SPC dates (format "yyyymmdd"). :return: first_year: First year. :return: last_year: Last year. """ start_times_unix_sec = numpy.array( [time_conversion.get_start_of_spc_date(d) for d in spc_date_strings], dtype=int) end_times_unix_sec = numpy.array( [time_conversion.get_end_of_spc_date(d) for d in spc_date_strings], dtype=int) start_years = numpy.array([ int(time_conversion.unix_sec_to_string(t, '%Y')) for t in start_times_unix_sec ], dtype=int) end_years = numpy.array([ int(time_conversion.unix_sec_to_string(t, '%Y')) for t in end_times_unix_sec ], dtype=int) all_years = numpy.concatenate((start_years, end_years)) return numpy.min(all_years), numpy.max(all_years)
def _get_pathless_processed_file_name(start_time_unix_sec=None, end_time_unix_sec=None, primary_source=None, secondary_source=None): """Generates pathless name for processed wind file. :param start_time_unix_sec: Start time. :param end_time_unix_sec: End time. :param primary_source: String ID for primary data source. :param secondary_source: String ID for secondary data source. :return: pathless_processed_file_name: Pathless name for processed wind file. """ if primary_source == MADIS_DATA_SOURCE: combined_source = '{0:s}_{1:s}'.format(primary_source, secondary_source) else: combined_source = primary_source.replace('_', '-') return '{0:s}_{1:s}_{2:s}_{3:s}{4:s}'.format( PROCESSED_FILE_PREFIX, combined_source, time_conversion.unix_sec_to_string(start_time_unix_sec, TIME_FORMAT_SECOND), time_conversion.unix_sec_to_string(end_time_unix_sec, TIME_FORMAT_SECOND), PROCESSED_FILE_EXTENSION)
def find_raw_files_one_day(top_directory_name, unix_time_sec, file_extension, raise_error_if_all_missing=True): """Finds all raw (ASCII or JSON) files for one day. :param top_directory_name: Name of top-level directory with raw probSevere files. :param unix_time_sec: Valid time (any time on the given day). :param file_extension: File type (either ".json" or ".ascii"). :param raise_error_if_all_missing: Boolean flag. If no files are found and raise_error_if_all_missing = True, this method will error out. If no files are found and raise_error_if_all_missing = False, will return None. :return: raw_file_names: [may be None] 1-D list of paths to raw files. :raises: ValueError: if no files are found and raise_error_if_all_missing = True. """ error_checking.assert_is_string(top_directory_name) error_checking.assert_is_boolean(raise_error_if_all_missing) dummy_pathless_file_name = _get_pathless_raw_file_name( unix_time_sec=unix_time_sec, file_extension=file_extension) time_string = time_conversion.unix_sec_to_string(unix_time_sec, RAW_FILE_TIME_FORMAT) pathless_file_name_pattern = dummy_pathless_file_name.replace( time_string, RAW_FILE_TIME_FORMAT_REGEX) raw_file_pattern = '{0:s}/{1:s}/{2:s}/{3:s}'.format( top_directory_name, time_conversion.unix_sec_to_string(unix_time_sec, MONTH_FORMAT), time_conversion.unix_sec_to_string(unix_time_sec, DATE_FORMAT), pathless_file_name_pattern) raw_file_names = glob.glob(raw_file_pattern) if len(raw_file_names): return raw_file_names pathless_file_name_pattern = pathless_file_name_pattern.replace( RAW_FILE_NAME_PREFIX, ALT_RAW_FILE_NAME_PREFIX) raw_file_pattern = '{0:s}/{1:s}/{2:s}/{3:s}'.format( top_directory_name, time_conversion.unix_sec_to_string(unix_time_sec, MONTH_FORMAT), time_conversion.unix_sec_to_string(unix_time_sec, DATE_FORMAT), pathless_file_name_pattern) raw_file_names = glob.glob(raw_file_pattern) if len(raw_file_names): return raw_file_names if not raise_error_if_all_missing: return None error_string = 'Cannot find any files with pattern: "{0:s}"'.format( raw_file_pattern) raise ValueError(error_string)
def download_1minute_file(station_id=None, month_unix_sec=None, top_local_directory_name=None, raise_error_if_fails=True): """Downloads file with 1-minute METARs for one station-month. :param station_id: String ID for station. :param month_unix_sec: Month in Unix format (sec since 0000 UTC 1 Jan 1970). :param top_local_directory_name: Top local directory with raw files containing 1-minute METARs. :param raise_error_if_fails: Boolean flag. If True and download fails, will raise error. :return: local_file_name: Path to downloaded file on local machine. If download failed but raise_error_if_fails = False, local_file_name = None. :raises: ValueError: if file cannot be downloaded. """ local_file_name = find_local_raw_1minute_file( station_id=station_id, month_unix_sec=month_unix_sec, top_directory_name=top_local_directory_name, raise_error_if_missing=False) possible_orig_station_ids = _station_id_to_online(station_id) for this_station_id in possible_orig_station_ids: pathless_file_name = _get_pathless_raw_1minute_file_name( this_station_id, month_unix_sec) online_file_name = '{0:s}/{1:s}{2:s}/{3:s}'.format( TOP_FTP_DIR_NAME_1MINUTE, ONLINE_SUBDIR_PREFIX_1MINUTE, time_conversion.unix_sec_to_string(month_unix_sec, TIME_FORMAT_YEAR), pathless_file_name) this_local_file_name = downloads.download_file_via_ftp( server_name=FTP_SERVER_NAME, user_name=FTP_USER_NAME, password=FTP_PASSWORD, ftp_file_name=online_file_name, local_file_name=local_file_name, raise_error_if_fails=raise_error_if_fails) if this_local_file_name is not None: return local_file_name if raise_error_if_fails: error_string = ( 'Could not download file for station "{0:s}", month {1:s}.' ).format( station_id, time_conversion.unix_sec_to_string(month_unix_sec, TIME_FORMAT_MONTH)) raise ValueError(error_string) return None
def find_file( top_prediction_dir_name, first_init_time_unix_sec, last_init_time_unix_sec, gridded, raise_error_if_missing=False): """Finds gridded or ungridded prediction files. :param top_prediction_dir_name: Name of top-level directory with prediction files. :param first_init_time_unix_sec: First initial time in file. The "initial time" is the time of the storm object for which the prediction is being made. This is different than the valid-time window (time range for which the prediction is valid). :param last_init_time_unix_sec: Last initial time in file. :param gridded: Boolean flag. If True, will look for gridded file. If False, will look for ungridded file. :param raise_error_if_missing: Boolean flag. If file is missing and `raise_error_if_missing = True`, this method will error out. :return: prediction_file_name: Path to prediction file. If file is missing and `raise_error_if_missing = False`, this will be the expected path. :raises: ValueError: if file is missing and `raise_error_if_missing = True`. """ # TODO(thunderhoser): Put lead time in file names. error_checking.assert_is_string(top_prediction_dir_name) error_checking.assert_is_integer(first_init_time_unix_sec) error_checking.assert_is_integer(last_init_time_unix_sec) error_checking.assert_is_geq( last_init_time_unix_sec, first_init_time_unix_sec) error_checking.assert_is_boolean(gridded) error_checking.assert_is_boolean(raise_error_if_missing) spc_date_string = time_conversion.time_to_spc_date_string( first_init_time_unix_sec) prediction_file_name = ( '{0:s}/{1:s}/{2:s}/{3:s}_predictions_{4:s}_{5:s}{6:s}' ).format( top_prediction_dir_name, spc_date_string[:4], spc_date_string, 'gridded' if gridded else 'ungridded', time_conversion.unix_sec_to_string( first_init_time_unix_sec, FILE_NAME_TIME_FORMAT), time_conversion.unix_sec_to_string( last_init_time_unix_sec, FILE_NAME_TIME_FORMAT), '.p' if gridded else '.nc' ) if raise_error_if_missing and not os.path.isfile(prediction_file_name): error_string = 'Cannot find file. Expected at: "{0:s}"'.format( prediction_file_name) raise ValueError(error_string) return prediction_file_name
def find_raw_file(top_directory_name, unix_time_sec, file_extension, raise_error_if_missing=True): """Finds raw (either ASCII or JSON) file with probSevere data. This file should contain all storm-tracking data for one time step. :param top_directory_name: Name of top-level directory with raw probSevere files. :param unix_time_sec: Valid time. :param file_extension: File type (either ".json" or ".ascii"). :param raise_error_if_missing: Boolean flag. If file is missing and raise_error_if_missing = True, this method will error out. If file is missing and raise_error_if_missing = False, will return expected path to raw file. :return: raw_file_name: Path or expected path to raw file. :raises: ValueError: if file is missing and raise_error_if_missing = True. """ error_checking.assert_is_string(top_directory_name) _check_raw_file_extension(file_extension) error_checking.assert_is_boolean(raise_error_if_missing) pathless_file_name = _get_pathless_raw_file_name( unix_time_sec=unix_time_sec, file_extension=file_extension) raw_file_name = '{0:s}/{1:s}/{2:s}/{3:s}'.format( top_directory_name, time_conversion.unix_sec_to_string(unix_time_sec, MONTH_FORMAT), time_conversion.unix_sec_to_string(unix_time_sec, DATE_FORMAT), pathless_file_name) if os.path.isfile(raw_file_name) or not raise_error_if_missing: return raw_file_name pathless_file_name = pathless_file_name.replace(RAW_FILE_NAME_PREFIX, ALT_RAW_FILE_NAME_PREFIX) alt_raw_file_name = '{0:s}/{1:s}/{2:s}/{3:s}'.format( top_directory_name, time_conversion.unix_sec_to_string(unix_time_sec, MONTH_FORMAT), time_conversion.unix_sec_to_string(unix_time_sec, DATE_FORMAT), pathless_file_name) if os.path.isfile(alt_raw_file_name): return alt_raw_file_name error_string = ('Cannot find raw probSevere file. Expected at: "{0:s}"' ).format(raw_file_name) raise ValueError(error_string)
def check_time_separation(unix_times_sec, early_indices=None, late_indices=None, time_separation_sec=DEFAULT_TIME_SEPARATION_SEC): """Ensures that there is a separation (buffer) between two sets of times. :param unix_times_sec: See documentation for _apply_time_separation. :param early_indices: See documentation for _apply_time_separation. :param late_indices: See documentation for _apply_time_separation. :param time_separation_sec: See documentation for _apply_time_separation. :raises: ValueError: if separation between sets is < `time_separation_sec`. """ error_checking.assert_is_integer_numpy_array(unix_times_sec) error_checking.assert_is_numpy_array_without_nan(unix_times_sec) error_checking.assert_is_numpy_array(unix_times_sec, num_dimensions=1) num_times = len(unix_times_sec) error_checking.assert_is_integer_numpy_array(early_indices) error_checking.assert_is_numpy_array(early_indices, num_dimensions=1) error_checking.assert_is_geq_numpy_array(early_indices, 0) error_checking.assert_is_leq_numpy_array(early_indices, num_times - 1) error_checking.assert_is_integer_numpy_array(late_indices) error_checking.assert_is_numpy_array(late_indices, num_dimensions=1) error_checking.assert_is_geq_numpy_array(late_indices, 0) error_checking.assert_is_leq_numpy_array(late_indices, num_times - 1) error_checking.assert_is_greater_numpy_array( unix_times_sec[late_indices], numpy.max(unix_times_sec[early_indices])) error_checking.assert_is_integer(time_separation_sec) error_checking.assert_is_greater(time_separation_sec, 0) last_early_time_unix_sec = numpy.max(unix_times_sec[early_indices]) first_late_time_unix_sec = numpy.min(unix_times_sec[late_indices]) min_diff_between_sets_sec = (first_late_time_unix_sec - last_early_time_unix_sec) if min_diff_between_sets_sec < time_separation_sec: last_early_time_string = time_conversion.unix_sec_to_string( last_early_time_unix_sec, TIME_STRING_FORMAT) first_late_time_string = time_conversion.unix_sec_to_string( first_late_time_unix_sec, TIME_STRING_FORMAT) error_string = ('Last time in early set is ' + last_early_time_string + '. First time in late set is ' + first_late_time_string + '. This is a time separation of ' + str(min_diff_between_sets_sec) + ' seconds between sets. Required separation is >= ' + str(time_separation_sec) + ' s.') raise ValueError(error_string)
def find_many_files(directory_name, first_time_unix_sec, last_time_unix_sec, raise_error_if_any_missing=True, raise_error_if_all_missing=True, test_mode=False): """Finds many NetCDF files with RRTM data. :param directory_name: Name of directory where files are expected. :param first_time_unix_sec: First time at which examples are desired. :param last_time_unix_sec: Last time at which examples are desired. :param raise_error_if_any_missing: Boolean flag. If any file is missing and `raise_error_if_any_missing == True`, will throw error. :param raise_error_if_all_missing: Boolean flag. If all files are missing and `raise_error_if_all_missing == True`, will throw error. :param test_mode: Leave this alone. :return: rrtm_file_names: 1-D list of paths to example files. This list does *not* contain expected paths to non-existent files. """ error_checking.assert_is_boolean(raise_error_if_any_missing) error_checking.assert_is_boolean(raise_error_if_all_missing) error_checking.assert_is_boolean(test_mode) start_year = int( time_conversion.unix_sec_to_string(first_time_unix_sec, '%Y')) end_year = int(time_conversion.unix_sec_to_string(last_time_unix_sec, '%Y')) years = numpy.linspace(start_year, end_year, num=end_year - start_year + 1, dtype=int) rrtm_file_names = [] for this_year in years: this_file_name = find_file( directory_name=directory_name, year=this_year, raise_error_if_missing=raise_error_if_any_missing) if test_mode or os.path.isfile(this_file_name): rrtm_file_names.append(this_file_name) if raise_error_if_all_missing and len(rrtm_file_names) == 0: error_string = ( 'Cannot find any file in directory "{0:s}" from years {1:d}-{2:d}.' ).format(directory_name, start_year, end_year) raise ValueError(error_string) return rrtm_file_names
def find_file(valid_time_unix_sec, top_directory_name, raise_error_if_missing=True): """Finds file (text file in WPC format) on local machine. This file should contain positions of cyclones, anticyclones, fronts, etc. for a single valid time. :param valid_time_unix_sec: Valid time. :param top_directory_name: Name of top-level directory with WPC bulletins. :param raise_error_if_missing: Boolean flag. If file is missing and raise_error_if_missing = True, this method will error out. If file is missing and raise_error_if_missing = False, this method will return the *expected* path to the file. :return: bulletin_file_name: Path to file. If file is missing and raise_error_if_missing = False, this is the *expected* path. :raises: ValueError: if file is missing and raise_error_if_missing = True. """ error_checking.assert_is_string(top_directory_name) error_checking.assert_is_boolean(raise_error_if_missing) valid_time_string = time_conversion.unix_sec_to_string( valid_time_unix_sec, TIME_FORMAT_IN_FILE_NAME) bulletin_file_name = '{0:s}/{1:s}/{2:s}_{3:s}'.format( top_directory_name, valid_time_string[:4], PATHLESS_FILE_NAME_PREFIX, valid_time_string) if raise_error_if_missing and not os.path.isfile(bulletin_file_name): error_string = ('Cannot find file. Expected at location: "{0:s}"'. format(bulletin_file_name)) raise ValueError(error_string) return bulletin_file_name
def _get_pathless_grib_file_names(init_time_unix_sec, model_name, grid_id=None, lead_time_hours=None): """Returns possible pathless file names for the given model/grid. :param init_time_unix_sec: Model-initialization time. :param model_name: See doc for `nwp_model_utils.check_grid_id`. :param grid_id: Same. :param lead_time_hours: Lead time (valid time minus init time). :return: pathless_file_names: 1-D list of possible pathless file names. """ pathless_file_name_prefixes = _get_pathless_file_name_prefixes( model_name=model_name, grid_id=grid_id) grib_file_types = nwp_model_utils.get_grib_types(model_name) if model_name == nwp_model_utils.NARR_MODEL_NAME: lead_time_hours = 0 pathless_file_names = [] for this_prefix in pathless_file_name_prefixes: for this_file_type in grib_file_types: this_pathless_file_name = '{0:s}_{1:s}_{2:s}{3:s}'.format( this_prefix, time_conversion.unix_sec_to_string(init_time_unix_sec, TIME_FORMAT_HOUR), _lead_time_to_string(lead_time_hours), grib_io.file_type_to_extension(this_file_type)) pathless_file_names.append(this_pathless_file_name) return pathless_file_names
def _get_pathless_grib_file_names(init_time_unix_sec, lead_time_hours=None, model_name=None, grid_id=None): """Returns possible pathless names for a grib file. :param init_time_unix_sec: Initialization time. :param lead_time_hours: Lead time. If model is a reanalysis, lead time = 0 always, so you can leave this as None. :param model_name: Name of model. :param grid_id: String ID for grid. :return: pathless_file_names: 1-D list of possible pathless file names. """ nwp_model_utils.check_model_name(model_name) if model_name == nwp_model_utils.NARR_MODEL_NAME: lead_time_hours = 0 pathless_file_name_prefixes = _get_prefixes_for_pathless_file_name( model_name, grid_id) grib_types = nwp_model_utils.get_grib_types(model_name) pathless_file_names = [] for this_prefix in pathless_file_name_prefixes: for this_grib_type in grib_types: pathless_file_names.append('{0:s}_{1:s}_{2:s}{3:s}'.format( this_prefix, time_conversion.unix_sec_to_string(init_time_unix_sec, TIME_FORMAT_HOUR), _lead_time_to_string(lead_time_hours), grib_io.file_type_to_extension(this_grib_type))) return pathless_file_names
def _get_pathless_single_field_file_name(init_time_unix_sec, lead_time_hours=None, model_name=None, grid_id=None, grib1_field_name=None): """Returns pathless name for file with a single field. "Single field" = one variable at one time step and all grid cells. :param init_time_unix_sec: Model-initialization time (Unix format). :param lead_time_hours: Lead time (valid time minus init time). If model is a reanalysis, you can leave this as None (always zero). :param model_name: Name of model. :param grid_id: String ID for model grid. :param grib1_field_name: Field name in grib1 format. :return: pathless_file_name: Expected pathless file name. """ nwp_model_utils.check_model_name(model_name) if model_name == nwp_model_utils.NARR_MODEL_NAME: lead_time_hours = 0 return '{0:s}_{1:s}_{2:s}_{3:s}{4:s}'.format( _get_prefixes_for_pathless_file_name(model_name, grid_id)[0], time_conversion.unix_sec_to_string(init_time_unix_sec, TIME_FORMAT_HOUR), _lead_time_to_string(lead_time_hours), grib1_field_name.replace(' ', ''), SINGLE_FIELD_FILE_EXTENSION)
def find_raw_file_on_local_machine(unix_time_sec=None, top_local_directory_name=None, raise_error_if_missing=True): """Generates name of raw file on local machine. :param unix_time_sec: Time in Unix format. :param top_local_directory_name: Top-level directory with raw files on local machine. :param raise_error_if_missing: Boolean flag. If raise_error_if_missing = True and file is missing, will raise error. :return: raw_local_file_name: Path on local machine. If raise_error_if_missing = False and file is missing, this will be the *expected* path. :raises: ValueError: if raise_error_if_missing = True and file is missing. """ pathless_file_name = _get_pathless_raw_file_name(unix_time_sec) raw_local_file_name = '{0:s}/{1:s}/{2:s}'.format( top_local_directory_name, time_conversion.unix_sec_to_string(unix_time_sec, TIME_FORMAT_DATE), pathless_file_name) if raise_error_if_missing and not os.path.isfile(raw_local_file_name): raise ValueError( 'Cannot find raw file. Expected at location: ' + raw_local_file_name) return raw_local_file_name
def _get_relative_processed_directory(data_source=None, spc_date_unix_sec=None, unix_time_sec=None, tracking_scale_metres2=None): """Generates relative path for processed storm-tracking files. :param data_source: Data source (either "segmotion" or "probSevere"). :param spc_date_unix_sec: SPC date in Unix format (needed only if data_source = "segmotion"). :param unix_time_sec: Valid time (needed only if data_source = "probSevere"). :param tracking_scale_metres2: Tracking scale. :return: relative_processed_dir_name: Relative path for processed storm- tracking files. """ if data_source == SEGMOTION_SOURCE_ID: date_string = time_conversion.time_to_spc_date_string( spc_date_unix_sec) else: date_string = time_conversion.unix_sec_to_string( unix_time_sec, DATE_FORMAT) return '{0:s}/scale_{1:d}m2'.format(date_string, int(tracking_scale_metres2))
def find_match_file(top_directory_name, valid_time_unix_sec, raise_error_if_missing=False): """Finds match file. A "match file" matches storm objects in one dataset (e.g., MYRORSS or GridRad) to those in another dataset, at one time step. :param top_directory_name: Name of top-level directory. :param valid_time_unix_sec: Valid time. :param raise_error_if_missing: See doc for `find_file`. :return: match_file_name: Path to match file. If file is missing and `raise_error_if_missing = False`, this will be the *expected* path. :raises: ValueError: if file is missing and `raise_error_if_missing = True`. """ error_checking.assert_is_string(top_directory_name) error_checking.assert_is_boolean(raise_error_if_missing) spc_date_string = time_conversion.time_to_spc_date_string( valid_time_unix_sec) match_file_name = '{0:s}/{1:s}/{2:s}/storm-matches_{3:s}.p'.format( top_directory_name, spc_date_string[:4], spc_date_string, time_conversion.unix_sec_to_string( valid_time_unix_sec, FILE_NAME_TIME_FORMAT) ) if raise_error_if_missing and not os.path.isfile(match_file_name): error_string = 'Cannot find file. Expected at: "{0:s}"'.format( match_file_name) raise ValueError(error_string) return match_file_name
def find_single_field_file(init_time_unix_sec, lead_time_hours=None, model_name=None, grid_id=None, grib1_field_name=None, top_directory_name=None, raise_error_if_missing=True): """Finds with single field on local machine. "Single field" = one variable at one time step and all grid cells. :param init_time_unix_sec: Model-initialization time (Unix format). :param lead_time_hours: Lead time (valid time minus init time). If model is a reanalysis, you can leave this as None (always zero). :param model_name: Name of model. :param grid_id: String ID for model grid. :param grib1_field_name: Field name in grib1 format. :param top_directory_name: Name of top-level directory with single-field files for the given model/grib combo. :param raise_error_if_missing: :param raise_error_if_missing: Boolean flag. If True and file is missing, will raise an error. :return: single_field_file_name: Path to single-field file. If file is missing but raise_error_if_missing = False, this will be the *expected* path. :raises: ValueError: if raise_error_if_missing = True and file is missing. """ error_checking.assert_is_string(grib1_field_name) error_checking.assert_is_string(top_directory_name) error_checking.assert_is_boolean(raise_error_if_missing) nwp_model_utils.check_model_name(model_name) if model_name == nwp_model_utils.NARR_MODEL_NAME: lead_time_hours = 0 error_checking.assert_is_integer(lead_time_hours) error_checking.assert_is_geq(lead_time_hours, 0) pathless_file_name = _get_pathless_single_field_file_name( init_time_unix_sec, lead_time_hours=lead_time_hours, model_name=model_name, grid_id=grid_id, grib1_field_name=grib1_field_name) single_field_file_name = '{0:s}/{1:s}/{2:s}'.format( top_directory_name, time_conversion.unix_sec_to_string(init_time_unix_sec, TIME_FORMAT_MONTH), pathless_file_name) if raise_error_if_missing and not os.path.isfile(single_field_file_name): raise ValueError('Cannot find single-field file. Expected at: ' + single_field_file_name) return single_field_file_name
def _get_pathless_raw_file_name(unix_time_sec, zipped=True): """Generates pathless name for raw file. :param unix_time_sec: Valid time. :param zipped: Boolean flag. If True, will generate name for zipped file. If False, will generate name for unzipped file. :return: pathless_raw_file_name: Pathless name for raw file. """ if zipped: return '{0:s}{1:s}{2:s}'.format( time_conversion.unix_sec_to_string(unix_time_sec, TIME_FORMAT_SECONDS), UNZIPPED_FILE_EXTENSION, ZIPPED_FILE_EXTENSION) return '{0:s}{1:s}'.format( time_conversion.unix_sec_to_string(unix_time_sec, TIME_FORMAT_SECONDS), UNZIPPED_FILE_EXTENSION)
def _plot_one_tornado(tornado_table, axes_object): """Plots tornadoes. :param tornado_table: pandas DataFrame with points on one tornado track. See `linkage._read_input_tornado_reports` for expected columns. :param axes_object: Will plot on these axes (instance of `matplotlib.axes._subplots.AxesSubplot`). """ latitudes_deg = ( tornado_table[linkage.EVENT_LATITUDE_COLUMN].values[[0, -1]] ) longitudes_deg = ( tornado_table[linkage.EVENT_LONGITUDE_COLUMN].values[[0, -1]] ) valid_times_unix_sec = ( tornado_table[linkage.EVENT_TIME_COLUMN].values[[0, -1]] ) valid_time_strings = [ time_conversion.unix_sec_to_string(t, TORNADO_TIME_FORMAT) for t in valid_times_unix_sec ] axes_object.plot( longitudes_deg[0], latitudes_deg[0], linestyle='None', marker=TORNADO_START_MARKER_TYPE, markersize=TORNADO_START_MARKER_SIZE, markeredgewidth=TORNADO_START_MARKER_EDGE_WIDTH, markerfacecolor=TORNADO_MARKER_COLOUR, markeredgecolor=TORNADO_MARKER_COLOUR ) axes_object.plot( longitudes_deg[-1], latitudes_deg[-1], linestyle='None', marker=TORNADO_END_MARKER_TYPE, markersize=TORNADO_END_MARKER_SIZE, markeredgewidth=TORNADO_END_MARKER_EDGE_WIDTH, markerfacecolor=TORNADO_MARKER_COLOUR, markeredgecolor=TORNADO_MARKER_COLOUR ) axes_object.plot( longitudes_deg, latitudes_deg, linestyle=':', linewidth=TORNADO_TRACK_WIDTH, color=TORNADO_TRACK_COLOUR ) axes_object.text( longitudes_deg[0], latitudes_deg[0] - 0.05, valid_time_strings[0], fontsize=MAIN_FONT_SIZE, color=FONT_COLOUR, rotation=-90, bbox=TEXT_BOUNDING_BOX_DICT, horizontalalignment='center', verticalalignment='top', zorder=1e10 ) axes_object.text( longitudes_deg[-1], latitudes_deg[-1] - 0.05, valid_time_strings[-1], fontsize=MAIN_FONT_SIZE, color=FONT_COLOUR, rotation=-90, bbox=TEXT_BOUNDING_BOX_DICT, horizontalalignment='center', verticalalignment='top', zorder=1e10 )
def _get_pathless_raw_file_name(unix_time_sec): """Generates pathless name for raw MADIS file. :param unix_time_sec: Time in Unix format. :return: pathless_raw_file_name: Pathless name for raw MADIS file. """ return '{0:s}{1:s}'.format( time_conversion.unix_sec_to_string(unix_time_sec, TIME_FORMAT_HOUR), RAW_FILE_EXTENSION)
def find_file_for_time_period(directory_name, file_type, start_time_unix_sec, end_time_unix_sec, raise_error_if_missing=True): """Finds file with fronts for a contiguous time period. Specifically, this file should contain EITHER polylines or NARR grids, defining warm and cold fronts, at all 3-hour time steps in the given period. :param directory_name: Name of directory. :param file_type: Type of file (either "polylines" or "narr_grids"). :param start_time_unix_sec: Start of contiguous time period. :param end_time_unix_sec: End of contiguous time period. :param raise_error_if_missing: Boolean flag. If file is missing and raise_error_if_missing = True, this method will error out. If file is missing and raise_error_if_missing = False, this method will return the *expected* path to the file. :return: front_file_name: Path to file. """ error_checking.assert_is_string(directory_name) _check_file_type(file_type) error_checking.assert_is_boolean(raise_error_if_missing) if file_type == POLYLINE_FILE_TYPE: this_pathless_file_prefix = PATHLESS_PREFIX_FOR_POLYLINE_FILES else: this_pathless_file_prefix = PATHLESS_PREFIX_FOR_GRIDDED_FILES front_file_name = '{0:s}/{1:s}_{2:s}-{3:s}.p'.format( directory_name, this_pathless_file_prefix, time_conversion.unix_sec_to_string(start_time_unix_sec, TIME_FORMAT_IN_FILE_NAMES), time_conversion.unix_sec_to_string(end_time_unix_sec, TIME_FORMAT_IN_FILE_NAMES)) if raise_error_if_missing and not os.path.isfile(front_file_name): error_string = ('Cannot find file. Expected at location: "{0:s}"'. format(front_file_name)) raise ValueError(error_string) return front_file_name
def find_prediction_file(directory_name, first_valid_time_unix_sec, last_valid_time_unix_sec, ensembled=False, raise_error_if_missing=True): """Finds Pickle file with gridded predictions. :param directory_name: Name of directory. :param first_valid_time_unix_sec: First time in file. :param last_valid_time_unix_sec: Last time in file. :param ensembled: Boolean flag. If True, file should contain ensembled probabilistic predictions, written by `write_gridded_prediction_file`. If False, should contain non-ensembled deterministic predictions, written by `write_ensembled_predictions`. :param raise_error_if_missing: Boolean flag. If file is missing and `raise_error_if_missing = True`, this method will error out. :return: prediction_file_name: Path to prediction file. If file is missing and `raise_error_if_missing = False`, this will be the *expected* path. :raises: ValueError: if file is missing and `raise_error_if_missing = True`. """ error_checking.assert_is_string(directory_name) error_checking.assert_is_integer(first_valid_time_unix_sec) error_checking.assert_is_integer(last_valid_time_unix_sec) error_checking.assert_is_geq(last_valid_time_unix_sec, first_valid_time_unix_sec) error_checking.assert_is_boolean(ensembled) error_checking.assert_is_boolean(raise_error_if_missing) prediction_file_name = '{0:s}/{1:s}_predictions_{2:s}-{3:s}.p'.format( directory_name, 'ensembled' if ensembled else 'gridded', time_conversion.unix_sec_to_string(first_valid_time_unix_sec, TIME_FORMAT_IN_FILE_NAMES), time_conversion.unix_sec_to_string(last_valid_time_unix_sec, TIME_FORMAT_IN_FILE_NAMES)) if not os.path.isfile(prediction_file_name) and raise_error_if_missing: error_string = 'Cannot find file. Expected at: "{0:s}"'.format( prediction_file_name) raise ValueError(error_string) return prediction_file_name
def find_file_for_time_period(directory_name, field_name, pressure_level_mb, start_time_unix_sec, end_time_unix_sec, raise_error_if_missing=True): """Finds file with NARR data for a contiguous time period. Specifically, this file should contain grids for one variable, at one pressure level, at all 3-hour time steps in the given period. :param directory_name: Name of directory. :param field_name: Field name in GewitterGefahr format. :param pressure_level_mb: Pressure level (integer in millibars). :param start_time_unix_sec: Start of contiguous time period. :param end_time_unix_sec: End of contiguous time period. :param raise_error_if_missing: Boolean flag. If file is missing and raise_error_if_missing = True, this method will error out. If file is missing and raise_error_if_missing = False, this method will return the *expected* path to the file. :return: narr_file_name: Path to file. """ error_checking.assert_is_string(directory_name) check_field_name(field_name, require_standard=False) error_checking.assert_is_integer(pressure_level_mb) error_checking.assert_is_greater(pressure_level_mb, 0) error_checking.assert_is_boolean(raise_error_if_missing) narr_file_name = '{0:s}/{1:s}_{2:04d}mb_{3:s}-{4:s}.p'.format( directory_name, _remove_units_from_field_name(field_name), pressure_level_mb, time_conversion.unix_sec_to_string(start_time_unix_sec, TIME_FORMAT_IN_FILE_NAMES), time_conversion.unix_sec_to_string(end_time_unix_sec, TIME_FORMAT_IN_FILE_NAMES)) if raise_error_if_missing and not os.path.isfile(narr_file_name): error_string = ('Cannot find file. Expected at location: "{0:s}"'. format(narr_file_name)) raise ValueError(error_string) return narr_file_name
def find_file_for_one_time(top_directory_name, field_name, pressure_level_mb, valid_time_unix_sec, raise_error_if_missing=True): """Finds file with NARR data for a single time step. Specifically, this file should contain the grid for one variable, at one pressure level, at one time step. :param top_directory_name: Name of top-level directory with processed NARR files. :param field_name: Field name in GewitterGefahr format. :param pressure_level_mb: Pressure level (integer in millibars). :param valid_time_unix_sec: Valid time (= initialization time for NARR). :param raise_error_if_missing: Boolean flag. If file is missing and raise_error_if_missing = True, this method will error out. If file is missing and raise_error_if_missing = False, this method will return the *expected* path to the file. :return: narr_file_name: Path to file. """ error_checking.assert_is_string(top_directory_name) check_field_name(field_name, require_standard=False) error_checking.assert_is_integer(pressure_level_mb) error_checking.assert_is_greater(pressure_level_mb, 0) error_checking.assert_is_boolean(raise_error_if_missing) narr_file_name = '{0:s}/{1:s}/{2:s}_{3:04d}mb_{4:s}.p'.format( top_directory_name, time_conversion.unix_sec_to_string(valid_time_unix_sec, TIME_FORMAT_MONTH), _remove_units_from_field_name(field_name), pressure_level_mb, time_conversion.unix_sec_to_string(valid_time_unix_sec, TIME_FORMAT_IN_FILE_NAMES)) if raise_error_if_missing and not os.path.isfile(narr_file_name): error_string = ('Cannot find file. Expected at location: "{0:s}"'. format(narr_file_name)) raise ValueError(error_string) return narr_file_name
def find_target_file(top_directory_name, event_type_string, spc_date_string, raise_error_if_missing=True, unix_time_sec=None): """Locates file with target values for either one time or one SPC date. :param top_directory_name: Name of top-level directory with target files. :param event_type_string: Event type (must be accepted by `linkage.check_event_type`). :param spc_date_string: SPC date (format "yyyymmdd"). :param raise_error_if_missing: Boolean flag. If file is missing and `raise_error_if_missing = True`, this method will error out. :param unix_time_sec: Valid time. :return: target_file_name: Path to linkage file. If file is missing and `raise_error_if_missing = False`, this will be the *expected* path. :raises: ValueError: if file is missing and `raise_error_if_missing = True`. """ error_checking.assert_is_string(top_directory_name) linkage.check_event_type(event_type_string) error_checking.assert_is_boolean(raise_error_if_missing) if unix_time_sec is None: time_conversion.spc_date_string_to_unix_sec(spc_date_string) if event_type_string == linkage.WIND_EVENT_STRING: target_file_name = '{0:s}/{1:s}/wind_labels_{2:s}.nc'.format( top_directory_name, spc_date_string[:4], spc_date_string) else: target_file_name = '{0:s}/{1:s}/tornado_labels_{2:s}.nc'.format( top_directory_name, spc_date_string[:4], spc_date_string) else: spc_date_string = time_conversion.time_to_spc_date_string( unix_time_sec) valid_time_string = time_conversion.unix_sec_to_string( unix_time_sec, TIME_FORMAT) if event_type_string == linkage.WIND_EVENT_STRING: target_file_name = '{0:s}/{1:s}/{2:s}/wind_labels_{3:s}.nc'.format( top_directory_name, spc_date_string[:4], spc_date_string, valid_time_string) else: target_file_name = ( '{0:s}/{1:s}/{2:s}/tornado_labels_{3:s}.nc').format( top_directory_name, spc_date_string[:4], spc_date_string, valid_time_string) if raise_error_if_missing and not os.path.isfile(target_file_name): error_string = 'Cannot find file. Expected at: "{0:s}"'.format( target_file_name) raise ValueError(error_string) return target_file_name
def _get_pathless_raw_file_name(unix_time_sec, zipped=True): """Generates pathless name for raw file. This file should contain one radar field at one height and one time step. :param unix_time_sec: Time in Unix format. :param zipped: Boolean flag. If zipped = True, will look for zipped file. If zipped = False, will look for unzipped file. :return: pathless_raw_file_name: Pathless name for raw file. """ if zipped: return '{0:s}{1:s}{2:s}'.format( time_conversion.unix_sec_to_string( unix_time_sec, TIME_FORMAT_SECONDS), UNZIPPED_FILE_EXTENSION, ZIPPED_FILE_EXTENSION) return '{0:s}{1:s}'.format( time_conversion.unix_sec_to_string(unix_time_sec, TIME_FORMAT_SECONDS), UNZIPPED_FILE_EXTENSION)
def find_processed_file(top_processed_dir_name, tracking_scale_metres2, data_source, unix_time_sec, spc_date_string=None, raise_error_if_missing=True): """Finds processed tracking file. This file should contain storm outlines and tracking statistics for one time step. :param top_processed_dir_name: See doc for `_check_input_args_for_file_finding`. :param tracking_scale_metres2: Same. :param data_source: Same. :param unix_time_sec: Valid time. :param spc_date_string: [used only if data_source == "probsevere"] SPC date (format "yyyymmdd"). :param raise_error_if_missing: Boolean flag. If the file is missing and `raise_error_if_missing = True`, this method will error out. If the file is missing and `raise_error_if_missing = False`, will return the *expected* path. :return: processed_file_name: Path to processed tracking file. :raises: ValueError: if the file is missing and `raise_error_if_missing = True`. """ tracking_scale_metres2 = _check_input_args_for_file_finding( top_processed_dir_name=top_processed_dir_name, tracking_scale_metres2=tracking_scale_metres2, data_source=data_source, raise_error_if_missing=raise_error_if_missing) if data_source == tracking_utils.SEGMOTION_SOURCE_ID: date_string = spc_date_string else: date_string = time_conversion.time_to_spc_date_string(unix_time_sec) processed_file_name = ( '{0:s}/{1:s}/{2:s}/scale_{3:d}m2/{4:s}_{5:s}_{6:s}{7:s}').format( top_processed_dir_name, date_string[:4], date_string, tracking_scale_metres2, PREFIX_FOR_PATHLESS_FILE_NAMES, data_source, time_conversion.unix_sec_to_string(unix_time_sec, TIME_FORMAT_IN_FILE_NAMES), FILE_EXTENSION) if raise_error_if_missing and not os.path.isfile(processed_file_name): error_string = 'Cannot find file. Expected at: "{0:s}"'.format( processed_file_name) raise ValueError(error_string) return processed_file_name
def find_raw_files_one_spc_date(spc_date_string, field_name, data_source, top_directory_name, height_m_asl=None, raise_error_if_missing=True): """Finds raw files for one field and one SPC date. :param spc_date_string: SPC date (format "yyyymmdd"). :param field_name: Name of radar field in GewitterGefahr format. :param data_source: Data source (string). :param top_directory_name: Name of top-level directory with raw files. :param height_m_asl: Radar height (metres above sea level). :param raise_error_if_missing: Boolean flag. If True and no files are found, will raise error. :return: raw_file_names: 1-D list of paths to raw files. :raises: ValueError: if raise_error_if_missing = True and no files are found. """ error_checking.assert_is_boolean(raise_error_if_missing) example_time_unix_sec = time_conversion.spc_date_string_to_unix_sec( spc_date_string) example_file_name = find_raw_file(unix_time_sec=example_time_unix_sec, spc_date_string=spc_date_string, field_name=field_name, data_source=data_source, top_directory_name=top_directory_name, height_m_asl=height_m_asl, raise_error_if_missing=False) example_directory_name, example_pathless_file_name = os.path.split( example_file_name) example_time_string = time_conversion.unix_sec_to_string( example_time_unix_sec, TIME_FORMAT_SECONDS) pathless_file_pattern = example_pathless_file_name.replace( example_time_string, TIME_FORMAT_SECONDS_REGEX) pathless_file_pattern = pathless_file_pattern.replace( ZIPPED_FILE_EXTENSION, '*') raw_file_pattern = '{0:s}/{1:s}'.format(example_directory_name, pathless_file_pattern) raw_file_names = glob.glob(raw_file_pattern) if raise_error_if_missing and not raw_file_names: error_string = ( 'Could not find any files with the following pattern: {0:s}' ).format(raw_file_pattern) raise ValueError(error_string) return raw_file_names
def _get_pathless_raw_5minute_file_name(station_id, month_unix_sec): """Generates pathless name for raw 5-minute file. :param station_id: String ID for station. :param month_unix_sec: Month in Unix format. :return: pathless_raw_file_name: Pathless name for raw 5-minute file. """ return '{0:s}{1:s}{2:s}{3:s}'.format( PATHLESS_FILE_NAME_PREFIX_5MINUTE, station_id, time_conversion.unix_sec_to_string(month_unix_sec, TIME_FORMAT_MONTH), RAW_FILE_EXTENSION)