def resize_image(input_file_name, output_file_name, output_size_pixels, convert_exe_name=DEFAULT_CONVERT_EXE_NAME): """Resizes image. :param input_file_name: Path to input file (may be in any format handled by ImageMagick). :param output_file_name: Path to output file. :param output_size_pixels: Output size. :param convert_exe_name: See doc for `trim_whitespace`. :raises: ValueError: if ImageMagick command (which is ultimately a Unix command) fails. """ error_checking.assert_file_exists(input_file_name) file_system_utils.mkdir_recursive_if_necessary(file_name=output_file_name) error_checking.assert_is_integer(output_size_pixels) error_checking.assert_is_greater(output_size_pixels, 0) error_checking.assert_file_exists(convert_exe_name) command_string = '"{0:s}" "{1:s}" -resize {2:d}@ "{3:s}"'.format( convert_exe_name, input_file_name, output_size_pixels, output_file_name) exit_code = os.system(command_string) if exit_code == 0: return raise ValueError(ERROR_STRING)
def trim_whitespace(input_file_name, output_file_name, border_width_pixels=10, convert_exe_name=DEFAULT_CONVERT_EXE_NAME): """Trims whitespace around edge of image. :param input_file_name: Path to input file (may be in any format handled by ImageMagick). :param output_file_name: Path to output file. :param border_width_pixels: Desired border width (whitespace). :param convert_exe_name: Path to executable file for ImageMagick's "convert" function. If you installed ImageMagick with root access, this should be the default. Regardless, the pathless file name should be just "convert". :raises: ValueError: if ImageMagick command (which is ultimately a Unix command) fails. """ error_checking.assert_file_exists(input_file_name) file_system_utils.mkdir_recursive_if_necessary(file_name=output_file_name) error_checking.assert_is_integer(border_width_pixels) error_checking.assert_is_geq(border_width_pixels, 0) error_checking.assert_file_exists(convert_exe_name) command_string = ( '"{0:s}" "{1:s}" -trim -bordercolor White -border {2:d} "{3:s}"' ).format(convert_exe_name, input_file_name, border_width_pixels, output_file_name) exit_code = os.system(command_string) if exit_code == 0: return raise ValueError(ERROR_STRING)
def gzip_file(input_file_name, output_file_name=None, delete_input_file=True): """Creates gzip archive with one file. :param input_file_name: Path to input file (will be gzipped). :param output_file_name: Path to output file (extension must be ".gz"). If `output_file_name is None`, will simply append ".gz" to name of input file. :param delete_input_file: Boolean flag. If True, will delete input file after gzipping. :raises: ValueError: if `output_file_name` does not end with ".gz". :raises: ValueError: if the Unix command fails. """ error_checking.assert_file_exists(input_file_name) error_checking.assert_is_boolean(delete_input_file) if output_file_name is None: output_file_name = '{0:s}.gz'.format(input_file_name) if not output_file_name.endswith('.gz'): error_string = ( 'Output file ("{0:s}") should have extension ".gz".' ).format(output_file_name) raise ValueError(error_string) unix_command_string = 'gzip -v -c "{0:s}" > "{1:s}"'.format( input_file_name, output_file_name) exit_code = os.system(unix_command_string) if exit_code != 0: raise ValueError('\nUnix command failed (log messages shown above ' 'should explain why).') if delete_input_file: os.remove(input_file_name)
def read_field_from_full_grid_file(netcdf_file_name, field_name=None, metadata_dict=None, raise_error_if_fails=True): """Reads one radar field from full-grid (not sparse-grid) file. This file should contain all radar variables for one time step. M = number of rows (unique grid-point latitudes) N = number of columns (unique grid-point longitudes) H = number of height levels (unique grid-point heights) :param netcdf_file_name: Path to input file. :param field_name: Name of radar field. :param metadata_dict: Dictionary created by read_metadata_from_full_grid_file. :param raise_error_if_fails: Boolean flag. If True and file cannot be opened, this method will raise an error. If False and file cannot be opened, will return None for all output variables. :return: field_matrix: H-by-M-by-N numpy array with values of radar field. :return: grid_point_heights_m_asl: length-H numpy array of height levels (integer metres above sea level). If array is increasing (decreasing), height increases (decreases) with the first index of field_matrix. :return: grid_point_latitudes_deg: length-M numpy array of grid-point latitudes (deg N). If array is increasing (decreasing), latitude increases (decreases) with the second index of field_matrix. :return: grid_point_longitudes_deg: length-N numpy array of grid-point longitudes (deg N). If array is increasing (decreasing), latitude increases (decreases) with the third index of field_matrix. """ error_checking.assert_file_exists(netcdf_file_name) netcdf_dataset = netcdf_io.open_netcdf(netcdf_file_name, raise_error_if_fails) if netcdf_dataset is None: return None, None, None, None field_name_orig = radar_utils.field_name_new_to_orig( field_name=field_name, data_source_name=radar_utils.GRIDRAD_SOURCE_ID) field_matrix = numpy.array( netcdf_dataset.variables[field_name_orig][0, :, :, :]) grid_point_latitudes_deg = numpy.array( netcdf_dataset.variables[LATITUDE_NAME_ORIG]) grid_point_longitudes_deg = lng_conversion.convert_lng_positive_in_west( numpy.array(netcdf_dataset.variables[LONGITUDE_NAME_ORIG])) _check_grid_points(grid_point_latitudes_deg=grid_point_latitudes_deg, grid_point_longitudes_deg=grid_point_longitudes_deg, metadata_dict=metadata_dict) grid_point_heights_m_asl = KM_TO_METRES * numpy.array( netcdf_dataset.variables[HEIGHT_NAME_ORIG]) grid_point_heights_m_asl = numpy.round(grid_point_heights_m_asl).astype( int) netcdf_dataset.close() return (field_matrix, grid_point_heights_m_asl, grid_point_latitudes_deg, grid_point_longitudes_deg)
def read_5minute_winds_from_raw_file(text_file_name, utc_offset_hours): """Reads 5-minute wind observations from raw file. This file should contain 5-minute METARs for one station-month (see download_5minute_file). :param text_file_name: Path to input file. :param utc_offset_hours: Difference between local station time and UTC (local minus UTC). :return: wind_table: pandas DataFrame with the following columns. wind_table.unix_time_sec: Observation time (seconds since 0000 UTC 1 Jan 1970). wind_table.wind_speed_m_s01: Speed of sustained wind (m/s). wind_table.wind_direction_deg: Direction of sustained wind (degrees of origin -- i.e., direction that the wind is coming from -- as per meteorological convention). wind_table.wind_gust_speed_m_s01: Speed of wind gust (m/s). wind_table.wind_gust_direction_deg: Direction of wind gust (degrees of origin). """ error_checking.assert_file_exists(text_file_name) error_checking.assert_is_not_nan(utc_offset_hours) unix_times_sec = [] wind_speeds_m_s01 = [] wind_directions_deg = [] wind_gust_speeds_m_s01 = [] wind_gust_directions_deg = [] for this_line in open(text_file_name, 'r').readlines(): this_local_time_string = ( this_line[LOCAL_TIME_CHAR_INDICES_5MINUTE_FILE[0]: LOCAL_TIME_CHAR_INDICES_5MINUTE_FILE[1]]) this_time_unix_sec = _local_time_string_to_unix_sec( this_local_time_string, utc_offset_hours) (this_wind_speed_m_s01, this_wind_direction_deg, this_wind_gust_speed_m_s01, this_wind_gust_direction_deg ) = _parse_5minute_wind_from_line(this_line) unix_times_sec.append(this_time_unix_sec) wind_speeds_m_s01.append(this_wind_speed_m_s01) wind_directions_deg.append(this_wind_direction_deg) wind_gust_speeds_m_s01.append(this_wind_gust_speed_m_s01) wind_gust_directions_deg.append(this_wind_gust_direction_deg) wind_dict = { raw_wind_io.WIND_SPEED_COLUMN: wind_speeds_m_s01, raw_wind_io.WIND_DIR_COLUMN: wind_directions_deg, raw_wind_io.WIND_GUST_SPEED_COLUMN: wind_gust_speeds_m_s01, raw_wind_io.WIND_GUST_DIR_COLUMN: wind_gust_directions_deg, raw_wind_io.TIME_COLUMN: unix_times_sec } wind_table = pandas.DataFrame.from_dict(wind_dict) wind_table[raw_wind_io.WIND_SPEED_COLUMN] *= KT_TO_METRES_PER_SECOND wind_table[raw_wind_io.WIND_GUST_SPEED_COLUMN] *= KT_TO_METRES_PER_SECOND return _remove_invalid_wind_rows(wind_table)
def read_data_from_sparse_grid_file(netcdf_file_name, field_name_orig=None, data_source=None, sentinel_values=None, raise_error_if_fails=True): """Reads sparse radar grid from raw (either MYRORSS or MRMS) file. This file should contain one radar field at one height and one time step. :param netcdf_file_name: Path to input file. :param field_name_orig: Name of radar field in original (either MYRORSS or MRMS) format. :param data_source: Data source (either "myrorss" or "mrms"). :param sentinel_values: 1-D numpy array of sentinel values. :param raise_error_if_fails: Boolean flag. If True and file cannot be opened, this method will raise an error. If False and file cannot be opened, this method will return None. :return: sparse_grid_table: pandas DataFrame with the following columns. Each row corresponds to one grid cell. sparse_grid_table.grid_row: Row index. sparse_grid_table.grid_column: Column index. sparse_grid_table.<field_name>: Radar measurement (field_name is determined by the method `field_name_orig_to_new`). sparse_grid_table.num_grid_cells: Number of consecutive grid cells -- starting at the current one and counting along rows first, columns second -- with the same radar measurement. """ error_checking.assert_file_exists(netcdf_file_name) error_checking.assert_is_numpy_array_without_nan(sentinel_values) error_checking.assert_is_numpy_array(sentinel_values, num_dimensions=1) netcdf_dataset = netcdf_io.open_netcdf(netcdf_file_name, raise_error_if_fails) if netcdf_dataset is None: return None field_name = _field_name_orig_to_new(field_name_orig, data_source=data_source) num_values = len(netcdf_dataset.variables[GRID_ROW_COLUMN_ORIG]) if num_values == 0: sparse_grid_dict = { GRID_ROW_COLUMN: numpy.array([], dtype=int), GRID_COLUMN_COLUMN: numpy.array([], dtype=int), NUM_GRID_CELL_COLUMN: numpy.array([], dtype=int), field_name: numpy.array([], dtype=int)} else: sparse_grid_dict = { GRID_ROW_COLUMN: netcdf_dataset.variables[GRID_ROW_COLUMN_ORIG][:], GRID_COLUMN_COLUMN: netcdf_dataset.variables[GRID_COLUMN_COLUMN_ORIG][:], NUM_GRID_CELL_COLUMN: netcdf_dataset.variables[NUM_GRID_CELL_COLUMN_ORIG][:], field_name: netcdf_dataset.variables[field_name_orig][:]} netcdf_dataset.close() sparse_grid_table = pandas.DataFrame.from_dict(sparse_grid_dict) return _remove_sentinels_from_sparse_grid( sparse_grid_table, field_name, sentinel_values)
def read_stats_from_xml(xml_file_name, spc_date_unix_sec=None): """Reads storm statistics from XML file. :param xml_file_name: Path to input file. :param spc_date_unix_sec: SPC date in Unix format. :return: stats_table: pandas DataFrame with the following columns. stats_table.storm_id: String ID for storm cell. stats_table.east_velocity_m_s01: Eastward velocity (m/s). stats_table.north_velocity_m_s01: Northward velocity (m/s). stats_table.age_sec: Age of storm cell (seconds). """ error_checking.assert_file_exists(xml_file_name) xml_tree = _open_xml_file(xml_file_name) storm_dict = {} this_column_name = None this_column_name_orig = None this_column_values = None for this_element in xml_tree.iter(): if this_element.tag == 'datacolumn': if this_column_name_orig in XML_COLUMN_NAMES_ORIG: storm_dict.update({this_column_name: this_column_values}) this_column_name_orig = this_element.attrib['name'] if this_column_name_orig in XML_COLUMN_NAMES_ORIG: this_column_name = _xml_column_name_orig_to_new( this_column_name_orig) this_column_values = [] continue if this_column_name_orig not in XML_COLUMN_NAMES_ORIG: continue if this_column_name == tracking_io.STORM_ID_COLUMN: this_column_values.append(this_element.attrib['value']) elif this_column_name == tracking_io.NORTH_VELOCITY_COLUMN: this_column_values.append(-1 * float(this_element.attrib['value'])) elif this_column_name == tracking_io.EAST_VELOCITY_COLUMN: this_column_values.append(float(this_element.attrib['value'])) elif this_column_name == tracking_io.AGE_COLUMN: this_column_values.append( int(numpy.round(float(this_element.attrib['value'])))) stats_table = pandas.DataFrame.from_dict(storm_dict) spc_date_string = time_conversion.time_to_spc_date_string( spc_date_unix_sec) storm_ids = _append_spc_date_to_storm_ids( stats_table[tracking_io.STORM_ID_COLUMN].values, spc_date_string) stats_table = stats_table.assign( **{tracking_io.STORM_ID_COLUMN: storm_ids}) return tracking_io.remove_rows_with_nan(stats_table)
def read_keras_model(hdf5_file_name): """Reads Keras model from HDF5 file. :param hdf5_file_name: Path to input file. :return: keras_model_object: Instance of `keras.models.Model`. """ error_checking.assert_file_exists(hdf5_file_name) return load_model(hdf5_file_name, custom_objects=CUSTOM_OBJECT_DICT_FOR_READING_MODEL)
def read_model(hdf5_file_name): """Reads model from HDF5 file. :param hdf5_file_name: Path to input file. :return: model_object: Instance of `keras.models.Model`. """ error_checking.assert_file_exists(hdf5_file_name) return keras.models.load_model(hdf5_file_name, custom_objects=PERFORMANCE_METRIC_DICT)
def read_processed_file(csv_file_name): """Reads wind observations from processed file. Path to input file. :return: wind_table: See documentation for write_processed_file. """ error_checking.assert_file_exists(csv_file_name) return pandas.read_csv( csv_file_name, header=0, usecols=WIND_COLUMNS, dtype=WIND_COLUMN_TYPE_DICT)
def read_metadata_from_full_grid_file(netcdf_file_name, raise_error_if_fails=True): """Reads metadata from full-grid (not sparse-grid) file. This file should contain all radar variables for one time step. :param netcdf_file_name: Path to input file. :param raise_error_if_fails: Boolean flag. If True and file cannot be opened, this method will raise an error. If False and file cannot be opened, will return None. :return: metadata_dict: Dictionary with the following keys. metadata_dict['nw_grid_point_lat_deg']: Latitude (deg N) of northwesternmost grid point. metadata_dict['nw_grid_point_lng_deg']: Longitude (deg E) of northwesternmost grid point. metadata_dict['lat_spacing_deg']: Spacing (deg N) between adjacent rows. metadata_dict['lng_spacing_deg']: Spacing (deg E) between adjacent columns. metadata_dict['num_lat_in_grid']: Number of rows (unique grid-point latitudes). metadata_dict['num_lng_in_grid']: Number of columns (unique grid-point longitudes). metadata_dict['unix_time_sec']: Valid time. """ error_checking.assert_file_exists(netcdf_file_name) netcdf_dataset = netcdf_io.open_netcdf(netcdf_file_name, raise_error_if_fails) if netcdf_dataset is None: return None grid_point_latitudes_deg = numpy.array( netcdf_dataset.variables[LATITUDE_NAME_ORIG]) grid_point_longitudes_deg = lng_conversion.convert_lng_positive_in_west( numpy.array(netcdf_dataset.variables[LONGITUDE_NAME_ORIG])) metadata_dict = { radar_utils.NW_GRID_POINT_LAT_COLUMN: numpy.max(grid_point_latitudes_deg), radar_utils.NW_GRID_POINT_LNG_COLUMN: numpy.min(grid_point_longitudes_deg), radar_utils.LAT_SPACING_COLUMN: numpy.mean(numpy.diff(grid_point_latitudes_deg)), radar_utils.LNG_SPACING_COLUMN: numpy.mean(numpy.diff(grid_point_longitudes_deg)), radar_utils.NUM_LAT_COLUMN: len(grid_point_latitudes_deg), radar_utils.NUM_LNG_COLUMN: len(grid_point_longitudes_deg), radar_utils.UNIX_TIME_COLUMN: _time_from_gridrad_to_unix(netcdf_dataset.variables[TIME_NAME_ORIG][0]) } netcdf_dataset.close() return metadata_dict
def read_gridrad_stats_from_thea(csv_file_name): """Reads radar statistics created by GridRad software (file format by Thea). :param csv_file_name: Path to input file. :return: gridrad_statistic_table: pandas DataFrame with mandatory columns listed below. Other column names come from the list `GRIDRAD_STATISTIC_NAMES`. gridrad_statistic_table.storm_number: Numeric ID (integer) for storm cell. gridrad_statistic_table.unix_time_sec: Valid time of storm object. """ error_checking.assert_file_exists(csv_file_name) gridrad_statistic_table = pandas.read_csv(csv_file_name, header=0, sep=',') # Convert times from Thea's format to Unix format. unix_times_sec = numpy.array([ time_conversion.string_to_unix_sec(s, GRIDRAD_TIME_FORMAT) for s in gridrad_statistic_table[TIME_NAME_GRIDRAD_ORIG].values ]) gridrad_statistic_table = gridrad_statistic_table.assign( **{tracking_utils.TIME_COLUMN: unix_times_sec}) columns_to_keep = GRIDRAD_STATISTIC_NAMES_ORIG + [ STORM_NUMBER_NAME_GRIDRAD_ORIG, tracking_utils.TIME_COLUMN ] gridrad_statistic_table = gridrad_statistic_table[columns_to_keep] # Rename columns. column_dict_old_to_new = { STORM_NUMBER_NAME_GRIDRAD_ORIG: STORM_NUMBER_NAME_GRIDRAD, ECHO_TOP_40DBZ_NAME_GRIDRAD_ORIG: ECHO_TOP_40DBZ_NAME_GRIDRAD, SPECTRUM_WIDTH_NAME_GRIDRAD_ORIG: SPECTRUM_WIDTH_NAME_GRIDRAD, MAX_DIVERGENCE_NAME_GRIDRAD_ORIG: MAX_DIVERGENCE_NAME_GRIDRAD, UPPER_LEVEL_DIVERGENCE_NAME_GRIDRAD_ORIG: UPPER_LEVEL_DIVERGENCE_NAME_GRIDRAD, LOW_LEVEL_CONVERGENCE_NAME_GRIDRAD_ORIG: LOW_LEVEL_CONVERGENCE_NAME_GRIDRAD, DIVERGENCE_AREA_NAME_GRIDRAD_ORIG: DIVERGENCE_AREA_NAME_GRIDRAD, MAX_ROTATION_NAME_GRIDRAD_ORIG: MAX_ROTATION_NAME_GRIDRAD, UPPER_LEVEL_ROTATION_NAME_GRIDRAD_ORIG: UPPER_LEVEL_ROTATION_NAME_GRIDRAD, LOW_LEVEL_ROTATION_NAME_GRIDRAD_ORIG: LOW_LEVEL_ROTATION_NAME_GRIDRAD } gridrad_statistic_table.rename(columns=column_dict_old_to_new, inplace=True) # Convert units of divergence/convergence. gridrad_statistic_table[LOW_LEVEL_CONVERGENCE_NAME_GRIDRAD] *= -1 for this_name in GRIDRAD_DIVERGENCE_NAMES: gridrad_statistic_table[ this_name] *= CONVERSION_RATIO_FOR_GRIDRAD_DIVERGENCE return gridrad_statistic_table
def read_station_metadata_from_processed_file(csv_file_name): """Reads metadata for weather stations from file. :param csv_file_name: Path to input file. :return: station_metadata_table: See documentation for write_station_metadata_to_processed_file. """ error_checking.assert_file_exists(csv_file_name) return pandas.read_csv( csv_file_name, header=0, usecols=STATION_METADATA_COLUMNS, dtype=STATION_METADATA_COLUMN_TYPE_DICT)
def unzip_1day_tar_file(tar_file_name, spc_date_string, top_target_dir_name, scales_to_extract_metres2): """Unzips tar file with segmotion output for one SPC date. :param tar_file_name: Path to input file. :param spc_date_string: SPC date (format "yyyymmdd"). :param top_target_dir_name: Name of top-level output directory. :param scales_to_extract_metres2: 1-D numpy array of tracking scales to extract. :return: target_directory_name: Path to output directory. This will be "<top_target_directory_name>/<yyyymmdd>", where <yyyymmdd> is the SPC date. """ # Verification. _ = time_conversion.spc_date_string_to_unix_sec(spc_date_string) error_checking.assert_file_exists(tar_file_name) error_checking.assert_is_greater_numpy_array(scales_to_extract_metres2, 0) error_checking.assert_is_numpy_array(scales_to_extract_metres2, num_dimensions=1) scales_to_extract_metres2 = numpy.round(scales_to_extract_metres2).astype( int) num_scales_to_extract = len(scales_to_extract_metres2) directory_names_to_unzip = [] for j in range(num_scales_to_extract): this_relative_stats_dir_name = '{0:s}/{1:s}'.format( spc_date_string, _get_relative_stats_dir_physical_scale( scales_to_extract_metres2[j])) this_relative_polygon_dir_name = '{0:s}/{1:s}'.format( spc_date_string, _get_relative_polygon_dir_physical_scale( scales_to_extract_metres2[j])) directory_names_to_unzip.append( this_relative_stats_dir_name.replace(spc_date_string + '/', '')) directory_names_to_unzip.append( this_relative_polygon_dir_name.replace(spc_date_string + '/', '')) target_directory_name = '{0:s}/{1:s}/{2:s}'.format(top_target_dir_name, spc_date_string[:4], spc_date_string) unzipping.unzip_tar(tar_file_name, target_directory_name=target_directory_name, file_and_dir_names_to_unzip=directory_names_to_unzip) return target_directory_name
def open_netcdf(netcdf_file_name, raise_error_if_fails=False): """Attempts to open NetCDF file. Code for handling gzip files comes from jochen at the following StackOverflow page: https://stackoverflow.com/posts/45356133/revisions :param netcdf_file_name: Path to input file. :param raise_error_if_fails: Boolean flag. If raise_error_if_fails = True and file cannot be opened, this method will throw an error. :return: netcdf_dataset: Instance of `NetCDF4.Dataset`, containing all data from the file. If raise_error_if_fails = False and file could not be opened, this will be None. :raises: IOError: if file could not be opened and raise_error_if_fails = True. """ error_checking.assert_file_exists(netcdf_file_name) error_checking.assert_is_boolean(raise_error_if_fails) gzip_as_input = netcdf_file_name.endswith(GZIP_FILE_EXTENSION) if gzip_as_input: gzip_file_object = gzip.open(netcdf_file_name, 'rb') netcdf_temporary_file_object = tempfile.NamedTemporaryFile(delete=False) netcdf_file_name = netcdf_temporary_file_object.name success = False try: shutil.copyfileobj(gzip_file_object, netcdf_temporary_file_object) success = True except: if raise_error_if_fails: raise gzip_file_object.close() netcdf_temporary_file_object.close() if not success: os.remove(netcdf_file_name) return None try: netcdf_dataset = Dataset(netcdf_file_name) except IOError: if raise_error_if_fails: if gzip_as_input: os.remove(netcdf_file_name) raise netcdf_dataset = None if gzip_as_input: os.remove(netcdf_file_name) return netcdf_dataset
def read_data_from_full_grid_file(netcdf_file_name, metadata_dict, raise_error_if_fails=True): """Reads full radar grid from raw (either MYRORSS or MRMS) file. This file should contain one radar field at one height and one time step. M = number of rows (unique grid-point latitudes) N = number of columns (unique grid-point longitudes) :param netcdf_file_name: Path to input file. :param metadata_dict: Dictionary with metadata for NetCDF file, created by read_metadata_from_raw_file. :param raise_error_if_fails: Boolean flag. If True and file cannot be opened, this method will raise an error. If False and file cannot be opened, this method will return None for all output variables. :return: field_matrix: M-by-N numpy array with values of radar field. :return: unique_grid_point_lat_deg: length-M numpy array of grid-point latitudes (deg N). If array is increasing (decreasing), latitude increases (decreases) while traveling down the columns of field_matrix. :return: unique_grid_point_lng_deg: length-N numpy array of grid-point longitudes (deg E). If array is increasing (decreasing), longitude increases (decreases) while traveling right across the rows of field_matrix. """ error_checking.assert_file_exists(netcdf_file_name) netcdf_dataset = netcdf_io.open_netcdf(netcdf_file_name, raise_error_if_fails) if netcdf_dataset is None: return None, None, None field_matrix = netcdf_dataset.variables[ metadata_dict[FIELD_NAME_COLUMN_ORIG]] netcdf_dataset.close() min_latitude_deg = metadata_dict[NW_GRID_POINT_LAT_COLUMN] - ( metadata_dict[LAT_SPACING_COLUMN] * (metadata_dict[NUM_LAT_COLUMN] - 1)) unique_grid_point_lat_deg, unique_grid_point_lng_deg = ( grids.get_latlng_grid_points( min_latitude_deg=min_latitude_deg, min_longitude_deg=metadata_dict[NW_GRID_POINT_LNG_COLUMN], lat_spacing_deg=metadata_dict[LAT_SPACING_COLUMN], lng_spacing_deg=metadata_dict[LNG_SPACING_COLUMN], num_rows=metadata_dict[NUM_LAT_COLUMN], num_columns=metadata_dict[NUM_LNG_COLUMN])) field_matrix = _remove_sentinels_from_full_grid( field_matrix, metadata_dict[SENTINEL_VALUE_COLUMN]) return (numpy.flipud(field_matrix), unique_grid_point_lat_deg[::-1], unique_grid_point_lng_deg)
def concatenate_images(input_file_names, output_file_name, num_panel_rows, num_panel_columns, border_width_pixels=50, montage_exe_name=DEFAULT_MONTAGE_EXE_NAME): """Concatenates many images into one paneled image. :param input_file_names: 1-D list of paths to input files (may be in any format handled by ImageMagick). :param output_file_name: Path to output file. :param num_panel_rows: Number of rows in paneled image. :param num_panel_columns: Number of columns in paneled image. :param border_width_pixels: Border width (whitespace) around each pixel. :param montage_exe_name: Path to executable file for ImageMagick's "montage" function. If you installed ImageMagick with root access, this should be the default. Regardless, the pathless file name should be just "montage". :raises: ValueError: if ImageMagick command (which is ultimately a Unix command) fails. """ error_checking.assert_is_numpy_array(numpy.array(input_file_names), num_dimensions=1) for this_file_name in input_file_names: error_checking.assert_file_exists(this_file_name) file_system_utils.mkdir_recursive_if_necessary(file_name=output_file_name) error_checking.assert_is_integer(num_panel_rows) error_checking.assert_is_integer(num_panel_columns) error_checking.assert_is_integer(border_width_pixels) error_checking.assert_is_geq(border_width_pixels, 0) error_checking.assert_file_exists(montage_exe_name) num_panels = num_panel_rows * num_panel_columns error_checking.assert_is_geq(num_panels, len(input_file_names)) command_string = '"{0:s}" -mode concatenate -tile {1:d}x{2:d}'.format( montage_exe_name, num_panel_columns, num_panel_rows) for this_file_name in input_file_names: command_string += ' "{0:s}"'.format(this_file_name) command_string += ' -trim -bordercolor White -border {0:d} "{1:s}"'.format( border_width_pixels, output_file_name) exit_code = os.system(command_string) if exit_code == 0: return raise ValueError(ERROR_STRING)
def create_gif(input_file_names, output_file_name, num_seconds_per_frame, resize_factor=0.5, convert_exe_name=DEFAULT_CONVERT_EXE_NAME): """Creates GIF from static images. :param input_file_names: 1-D list of paths to input files (static images). :param output_file_name: Path to output file (GIF). :param num_seconds_per_frame: Number of seconds per frame. :param resize_factor: Resize factor. When creating GIF, each static image (frame) will be resized to q times its original size, where q = `resize_factor`. This will affect only the GIF. The images themselves, at locations specified in `input_file_names`, will not be changed. :param convert_exe_name: See doc for `trim_whitespace`. :raises: ValueError: if ImageMagick command (which is ultimately a Unix command) fails. """ error_checking.assert_is_string_list(input_file_names) error_checking.assert_is_numpy_array(numpy.array(input_file_names), num_dimensions=1) file_system_utils.mkdir_recursive_if_necessary(file_name=output_file_name) error_checking.assert_file_exists(convert_exe_name) error_checking.assert_is_greater(num_seconds_per_frame, 0.) error_checking.assert_is_leq(num_seconds_per_frame, 10.) error_checking.assert_is_geq(resize_factor, 0.2) error_checking.assert_is_leq(resize_factor, 1.) num_centiseconds_per_frame = int(numpy.round(100 * num_seconds_per_frame)) num_centiseconds_per_frame = max([num_centiseconds_per_frame, 1]) resize_percentage = int(numpy.round(100 * resize_factor)) resize_percentage = max([resize_percentage, 1]) command_string = '"{0:s}" -delay {1:d} '.format( convert_exe_name, num_centiseconds_per_frame) command_string += ' '.join(['"{0:s}"'.format(f) for f in input_file_names]) command_string += ' -resize {0:d}% "{1:s}"'.format(resize_percentage, output_file_name) exit_code = os.system(command_string) if exit_code == 0: return raise ValueError(ERROR_STRING)
def read_data_from_full_grid_file(netcdf_file_name, metadata_dict, raise_error_if_fails=True): """Reads full radar grid from raw (either MYRORSS or MRMS) file. This file should contain one radar field at one height and valid time. :param netcdf_file_name: Path to input file. :param metadata_dict: Dictionary created by `read_metadata_from_raw_file`. :param raise_error_if_fails: Boolean flag. If True and file cannot be read, this method will raise an error. If False and file cannot be read, will return None for all output vars. :return: field_matrix: M-by-N numpy array with radar field. Latitude increases while moving up each column, and longitude increases while moving right along each row. :return: grid_point_latitudes_deg: length-M numpy array of grid-point latitudes (deg N). This array is monotonically decreasing. :return: grid_point_longitudes_deg: length-N numpy array of grid-point longitudes (deg E). This array is monotonically increasing. """ error_checking.assert_file_exists(netcdf_file_name) netcdf_dataset = netcdf_io.open_netcdf(netcdf_file_name, raise_error_if_fails) if netcdf_dataset is None: return None, None, None field_matrix = netcdf_dataset.variables[ metadata_dict[FIELD_NAME_COLUMN_ORIG]] netcdf_dataset.close() min_latitude_deg = metadata_dict[radar_utils.NW_GRID_POINT_LAT_COLUMN] - ( metadata_dict[radar_utils.LAT_SPACING_COLUMN] * (metadata_dict[radar_utils.NUM_LAT_COLUMN] - 1)) grid_point_latitudes_deg, grid_point_longitudes_deg = ( grids.get_latlng_grid_points( min_latitude_deg=min_latitude_deg, min_longitude_deg=metadata_dict[ radar_utils.NW_GRID_POINT_LNG_COLUMN], lat_spacing_deg=metadata_dict[radar_utils.LAT_SPACING_COLUMN], lng_spacing_deg=metadata_dict[radar_utils.LNG_SPACING_COLUMN], num_rows=metadata_dict[radar_utils.NUM_LAT_COLUMN], num_columns=metadata_dict[radar_utils.NUM_LNG_COLUMN])) field_matrix = _remove_sentinels_from_full_grid( field_matrix, metadata_dict[radar_utils.SENTINEL_VALUE_COLUMN]) return (numpy.flipud(field_matrix), grid_point_latitudes_deg[::-1], grid_point_longitudes_deg)
def read_file(dill_file_name): """Reads set of isotonic-regression models from Dill file. :param dill_file_name: Path to input file. :return: scalar_model_objects: See doc for `train_models`. :return: vector_model_object_matrix: Same. """ error_checking.assert_file_exists(dill_file_name) dill_file_handle = open(dill_file_name, 'rb') scalar_model_objects = dill.load(dill_file_handle) vector_model_object_matrix = dill.load(dill_file_handle) dill_file_handle.close() return scalar_model_objects, vector_model_object_matrix
def read_station_metadata_from_raw_file(csv_file_name): """Reads metadata for Oklahoma Mesonet stations from raw file. This file is provided by the Oklahoma Mesonet and can be found here: www.mesonet.org/index.php/api/siteinfo/from_all_active_with_geo_fields/ format/csv/ :param csv_file_name: Path to input file. :return: station_metadata_table: pandas DataFrame with the following columns. station_metadata_table.station_id: String ID for station. station_metadata_table.station_name: Verbose name for station. station_metadata_table.latitude_deg: Latitude (deg N). station_metadata_table.longitude_deg: Longitude (deg E). station_metadata_table.elevation_m_asl: Elevation (metres above sea level). """ error_checking.assert_file_exists(csv_file_name) station_metadata_table = pandas.read_csv( csv_file_name, header=0, sep=',', dtype={ELEVATION_COLUMN_ORIG: numpy.float64}) station_metadata_table = station_metadata_table[ORIG_METADATA_COLUMN_NAMES] column_dict_old_to_new = { STATION_ID_COLUMN_IN_METADATA: raw_wind_io.STATION_ID_COLUMN, STATION_NAME_COLUMN_ORIG: raw_wind_io.STATION_NAME_COLUMN, LATITUDE_COLUMN_ORIG: raw_wind_io.LATITUDE_COLUMN, LONGITUDE_COLUMN_ORIG: raw_wind_io.LONGITUDE_COLUMN, ELEVATION_COLUMN_ORIG: raw_wind_io.ELEVATION_COLUMN } station_metadata_table.rename(columns=column_dict_old_to_new, inplace=True) station_metadata_table = _remove_invalid_metadata_rows( station_metadata_table) num_stations = len(station_metadata_table.index) for i in range(num_stations): station_metadata_table[raw_wind_io.STATION_ID_COLUMN].values[i] = ( raw_wind_io.append_source_to_station_id( station_metadata_table[ raw_wind_io.STATION_ID_COLUMN].values[i], primary_source=raw_wind_io.OK_MESONET_DATA_SOURCE)) return station_metadata_table
def read_points(netcdf_file_name): """Reads human points of interest for one image from NetCDF file. :param netcdf_file_name: Path to input file. :return: point_dict: Dictionary with the following keys. point_dict['full_storm_id_string']: See input doc for `write_points`. point_dict['storm_time_unix_sec']: Same. point_dict['grid_row_by_point']: Same. point_dict['grid_column_by_point']: Same. point_dict['panel_row_by_point']: Same. point_dict['panel_column_by_point']: Same. """ error_checking.assert_file_exists(netcdf_file_name) dataset_object = netCDF4.Dataset(netcdf_file_name) point_dict = { STORM_ID_KEY: str(getattr(dataset_object, STORM_ID_KEY)), STORM_TIME_KEY: int(numpy.round(getattr(dataset_object, STORM_TIME_KEY))), GRID_ROW_BY_POINT_KEY: numpy.array(dataset_object.variables[GRID_ROW_BY_POINT_KEY][:], dtype=float), GRID_COLUMN_BY_POINT_KEY: numpy.array(dataset_object.variables[GRID_COLUMN_BY_POINT_KEY][:], dtype=float), PANEL_ROW_BY_POINT_KEY: numpy.array(dataset_object.variables[PANEL_ROW_BY_POINT_KEY][:], dtype=int), PANEL_COLUMN_BY_POINT_KEY: numpy.array(dataset_object.variables[PANEL_COLUMN_BY_POINT_KEY][:], dtype=int) } dataset_object.close() if point_dict[STORM_ID_KEY] == DUMMY_STORM_ID_STRING: point_dict[STORM_ID_KEY] = None point_dict[STORM_TIME_KEY] = None return point_dict
def read_keras_model(hdf5_file_name, assumed_class_frequencies): """Reads Keras model from HDF5 file. :param hdf5_file_name: Path to input file. :param assumed_class_frequencies: See documentation for `get_unet_with_2d_convolution`. :return: keras_model_object: Instance of `keras.models.Model`. """ error_checking.assert_file_exists(hdf5_file_name) class_weight_dict = ml_utils.get_class_weight_dict( assumed_class_frequencies) class_weights = numpy.array(class_weight_dict.values()) class_weights = numpy.reshape(class_weights, (class_weights.size, 1)) CUSTOM_OBJECT_DICT_FOR_READING_MODEL.update( {'loss': keras_losses.weighted_cross_entropy(class_weights)}) return keras.models.load_model( hdf5_file_name, custom_objects=CUSTOM_OBJECT_DICT_FOR_READING_MODEL)
def capture_mouse_clicks(image_file_name, instruction_string=''): """This interactive method captures coordinates of human mouse clicks. N = number of mouse clicks :param image_file_name: Path to image file. This method will display the image in a figure window and allow you to click on top. :param instruction_string: String with instructions for the user. :return: point_objects_pixel_coords: length-N list of points (instances of `shapely.geometry.Point`), each containing a click location in pixel coordinates. :return: num_pixel_rows: Number of pixel rows in the image. :return: num_pixel_columns: Number of pixel columns in the image. """ error_checking.assert_file_exists(image_file_name) error_checking.assert_is_string(instruction_string) image_matrix = Image.open(image_file_name) num_pixel_columns, num_pixel_rows = image_matrix.size global figure_object figure_object = pyplot.subplots(1, 1, figsize=(FIGURE_WIDTH_INCHES, FIGURE_HEIGHT_INCHES))[0] pyplot.imshow(image_matrix) pyplot.title(instruction_string) connection_id = figure_object.canvas.mpl_connect('button_press_event', _click_handler) pyplot.show() figure_object.canvas.mpl_disconnect(connection_id) point_objects_pixel_coords = [ shapely.geometry.Point(this_x, this_y) for this_x, this_y in zip(x_coords_px, y_coords_px) ] return point_objects_pixel_coords, num_pixel_rows, num_pixel_columns
def read_field_from_grib_file(grib_file_name, field_name_grib1, num_grid_rows, num_grid_columns, sentinel_value=None, temporary_dir_name=None, wgrib_exe_name=WGRIB_EXE_NAME_DEFAULT, wgrib2_exe_name=WGRIB2_EXE_NAME_DEFAULT, raise_error_if_fails=True): """Reads field from grib file. One field = one variable at one time step. M = number of rows (unique y-coordinates or latitudes of grid points) N = number of columns (unique x-coordinates or longitudes of grid points) :param grib_file_name: Path to input file. :param field_name_grib1: Field name in grib1 format (example: 500-mb height is "HGT:500 mb"). :param num_grid_rows: Number of rows expected in grid. :param num_grid_columns: Number of columns expected in grid. :param sentinel_value: Sentinel value (all instances will be replaced with NaN). :param temporary_dir_name: Name of temporary directory. An intermediate text file will be stored here. :param wgrib_exe_name: Path to wgrib executable. :param wgrib2_exe_name: Path to wgrib2 executable. :param raise_error_if_fails: Boolean flag. If the extraction fails and raise_error_if_fails = True, this method will error out. If the extraction fails and raise_error_if_fails = False, this method will return None. :return: field_matrix: M-by-N numpy array with values of the given field. If the grid is regular in x-y coordinates, x increases towards the right (in the positive direction of the second axis), while y increases downward (in the positive direction of the first axis). If the grid is regular in lat-long, replace "x" and "y" in the previous sentence with "long" and "lat," respectively. :raises: ValueError: if extraction fails and raise_error_if_fails = True. """ # Error-checking. error_checking.assert_is_string(field_name_grib1) error_checking.assert_is_integer(num_grid_rows) error_checking.assert_is_greater(num_grid_rows, 0) error_checking.assert_is_integer(num_grid_columns) error_checking.assert_is_greater(num_grid_columns, 0) error_checking.assert_file_exists(wgrib_exe_name) error_checking.assert_file_exists(wgrib2_exe_name) error_checking.assert_is_boolean(raise_error_if_fails) if sentinel_value is not None: error_checking.assert_is_not_nan(sentinel_value) # Housekeeping. grib_file_type = file_name_to_type(grib_file_name) if temporary_dir_name is not None: file_system_utils.mkdir_recursive_if_necessary( directory_name=temporary_dir_name) temporary_file_name = tempfile.NamedTemporaryFile(dir=temporary_dir_name, delete=False).name # Extract field to temporary file. if grib_file_type == GRIB1_FILE_TYPE: command_string = ( '"{0:s}" "{1:s}" -s | grep -w "{2:s}" | "{0:s}" -i "{1:s}" ' '-text -nh -o "{3:s}"').format(wgrib_exe_name, grib_file_name, field_name_grib1, temporary_file_name) else: command_string = ( '"{0:s}" "{1:s}" -s | grep -w "{2:s}" | "{0:s}" -i "{1:s}" ' '-no_header -text "{3:s}"').format( wgrib2_exe_name, grib_file_name, _field_name_grib1_to_grib2(field_name_grib1), temporary_file_name) try: subprocess.call(command_string, shell=True) except OSError as this_exception: os.remove(temporary_file_name) if raise_error_if_fails: raise warning_string = ( '\n\n{0:s}\n\nCommand (shown above) failed (details shown below).' '\n\n{1:s}').format(command_string, str(this_exception)) warnings.warn(warning_string) return None # Read field from temporary file. field_vector = numpy.loadtxt(temporary_file_name) os.remove(temporary_file_name) try: field_matrix = numpy.reshape(field_vector, (num_grid_rows, num_grid_columns)) except ValueError as this_exception: if raise_error_if_fails: raise warning_string = ( '\n\nnumpy.reshape failed (details shown below).\n\n{0:s}').format( str(this_exception)) warnings.warn(warning_string) return None return _sentinel_value_to_nan(data_matrix=field_matrix, sentinel_value=sentinel_value)
def read_tornado_reports(csv_file_name): """Reads tornado reports from file. This file should contain all storm reports for one year. :param csv_file_name: Path to input file. :return: tornado_table: pandas DataFrame with the following columns. tornado_table.start_time_unix_sec: Start time. tornado_table.end_time_unix_sec: End time. tornado_table.start_latitude_deg: Latitude (deg N) of start point. tornado_table.start_longitude_deg: Longitude (deg E) of start point. tornado_table.end_latitude_deg: Latitude (deg N) of end point. tornado_table.end_longitude_deg: Longitude (deg E) of end point. tornado_table.fujita_rating: F-scale or EF-scale rating (integer from 0...5). tornado_table.width_metres: Tornado width (metres). """ error_checking.assert_file_exists(csv_file_name) storm_event_table = pandas.read_csv(csv_file_name, header=0, sep=',') tornado_flags = numpy.array([ _is_event_tornado(s) for s in storm_event_table[EVENT_TYPE_COLUMN_ORIG].values ]) bad_rows = numpy.where(numpy.invert(tornado_flags))[0] storm_event_table.drop(storm_event_table.index[bad_rows], axis=0, inplace=True) num_reports = len(storm_event_table.index) start_times_unix_sec = numpy.full(num_reports, -1, dtype=int) end_times_unix_sec = numpy.full(num_reports, -1, dtype=int) for i in range(num_reports): this_utc_offset_hours = _time_zone_string_to_utc_offset( storm_event_table[TIME_ZONE_COLUMN_ORIG].values[i]) if numpy.isnan(this_utc_offset_hours): continue start_times_unix_sec[i] = _local_time_string_to_unix_sec( storm_event_table[START_TIME_COLUMN_ORIG].values[i], this_utc_offset_hours) end_times_unix_sec[i] = _local_time_string_to_unix_sec( storm_event_table[END_TIME_COLUMN_ORIG].values[i], this_utc_offset_hours) tornado_dict = { tornado_io.START_TIME_COLUMN: start_times_unix_sec, tornado_io.END_TIME_COLUMN: end_times_unix_sec, tornado_io.START_LAT_COLUMN: storm_event_table[START_LATITUDE_COLUMN_ORIG].values, tornado_io.START_LNG_COLUMN: storm_event_table[START_LONGITUDE_COLUMN_ORIG].values, tornado_io.END_LAT_COLUMN: storm_event_table[END_LATITUDE_COLUMN_ORIG].values, tornado_io.END_LNG_COLUMN: storm_event_table[END_LONGITUDE_COLUMN_ORIG].values, tornado_io.FUJITA_RATING_COLUMN: storm_event_table[TORNADO_RATING_COLUMN_ORIG].values, tornado_io.WIDTH_COLUMN: FEET_TO_METRES * storm_event_table[TORNADO_WIDTH_COLUMN_ORIG].values } tornado_table = pandas.DataFrame.from_dict(tornado_dict) return tornado_io.remove_invalid_reports(tornado_table)
def read_thunderstorm_wind_reports(csv_file_name): """Reads thunderstorm-wind reports from file. This file should contain all storm reports for one year. :param csv_file_name: Path to input file. :return: wind_table: pandas DataFrame with the following columns. wind_table.latitude_deg: Latitude (deg N). wind_table.longitude_deg: Longitude (deg E). wind_table.unix_time_sec: Valid time. wind_table.u_wind_m_s01: u-component of wind (metres per second). wind_table.v_wind_m_s01: v-component of wind (metres per second). """ error_checking.assert_file_exists(csv_file_name) storm_event_table = pandas.read_csv(csv_file_name, header=0, sep=',') thunderstorm_wind_flags = numpy.array([ _is_event_thunderstorm_wind(s) for s in storm_event_table[EVENT_TYPE_COLUMN_ORIG].values ]) bad_rows = numpy.where(numpy.invert(thunderstorm_wind_flags))[0] storm_event_table.drop(storm_event_table.index[bad_rows], axis=0, inplace=True) num_reports = len(storm_event_table.index) unix_times_sec = numpy.full(num_reports, -1, dtype=int) for i in range(num_reports): this_utc_offset_hours = _time_zone_string_to_utc_offset( storm_event_table[TIME_ZONE_COLUMN_ORIG].values[i]) if numpy.isnan(this_utc_offset_hours): continue unix_times_sec[i] = _local_time_string_to_unix_sec( storm_event_table[START_TIME_COLUMN_ORIG].values[i], this_utc_offset_hours) wind_speeds_m_s01 = KT_TO_METRES_PER_SECOND * storm_event_table[ WIND_SPEED_COLUMN_ORIG].values wind_directions_deg = numpy.full(num_reports, raw_wind_io.WIND_DIR_DEFAULT_DEG) u_winds_m_s01, v_winds_m_s01 = raw_wind_io.speed_and_direction_to_uv( wind_speeds_m_s01, wind_directions_deg) station_ids = _create_fake_station_ids_for_wind(num_reports) wind_dict = { raw_wind_io.TIME_COLUMN: unix_times_sec, raw_wind_io.LATITUDE_COLUMN: storm_event_table[START_LATITUDE_COLUMN_ORIG].values, raw_wind_io.LONGITUDE_COLUMN: storm_event_table[START_LONGITUDE_COLUMN_ORIG].values, raw_wind_io.STATION_ID_COLUMN: station_ids, raw_wind_io.STATION_NAME_COLUMN: station_ids, raw_wind_io.ELEVATION_COLUMN: numpy.full(num_reports, numpy.nan), raw_wind_io.U_WIND_COLUMN: u_winds_m_s01, raw_wind_io.V_WIND_COLUMN: v_winds_m_s01 } wind_table = pandas.DataFrame.from_dict(wind_dict) return _remove_invalid_wind_reports(wind_table)
def read_highs_and_lows(text_file_name): """Reads locations of high- and low-pressure centers. :param text_file_name: Path to input file (text file in WPC format). :return: high_low_table: pandas DataFrame with the following columns. Each row is one high- or low-pressure center. high_low_table.system_type_string: Type of system (either "high" or "low"). high_low_table.unix_time_sec: Valid time. high_low_table.latitude_deg: Latitude (deg N). high_low_table.longitude_deg: Longitude (deg E). """ error_checking.assert_file_exists(text_file_name) valid_time_unix_sec = _file_name_to_valid_time(text_file_name) system_type_strings = [] latitudes_deg = numpy.array([], dtype=float) longitudes_deg = numpy.array([], dtype=float) for this_line in open(text_file_name, 'r').readlines(): these_words = this_line.split() if len(these_words) == 0: continue this_system_type_string = these_words[0].lower() if this_system_type_string not in VALID_SYSTEM_TYPE_STRINGS: continue these_words = these_words[1:] these_latitudes_deg = [] these_longitudes_deg = [] for this_word in these_words: if len(this_word) < 5: continue this_latitude_deg, this_longitude_deg = _string_to_latlng( latlng_string=this_word, raise_error_if_fails=True) these_latitudes_deg.append(this_latitude_deg) these_longitudes_deg.append(this_longitude_deg) these_latitudes_deg = numpy.array(these_latitudes_deg) these_longitudes_deg = numpy.array(these_longitudes_deg) if numpy.any(numpy.isnan(these_latitudes_deg)): continue error_checking.assert_is_valid_lat_numpy_array(these_latitudes_deg) these_longitudes_deg = lng_conversion.convert_lng_positive_in_west( these_longitudes_deg, allow_nan=False) system_type_strings += ([this_system_type_string] * len(these_latitudes_deg)) latitudes_deg = numpy.concatenate((latitudes_deg, these_latitudes_deg)) longitudes_deg = numpy.concatenate( (longitudes_deg, these_longitudes_deg)) valid_times_unix_sec = numpy.full(len(system_type_strings), valid_time_unix_sec, dtype=int) high_low_dict = { SYSTEM_TYPE_COLUMN: system_type_strings, front_utils.TIME_COLUMN: valid_times_unix_sec, LATITUDE_COLUMN: latitudes_deg, LONGITUDE_COLUMN: longitudes_deg } return pandas.DataFrame.from_dict(high_low_dict)
def read_fronts_from_file(text_file_name): """Reads locations of warm and cold fronts from WPC bulletin. Input file should contain positions of cyclones, anticyclones, fronts, etc. for a single valid time. :param text_file_name: Path to input file (text file in WPC format). :return: front_table: pandas DataFrame with the following columns. Each row is one front. front_table.front_type: Type of front (examples: "warm", "cold"). front_table.unix_time_sec: Valid time. front_table.latitudes_deg: 1-D numpy array of latitudes (deg N) along front. front_table.longitudes_deg: 1-D numpy array of longitudes (deg E) along front. """ error_checking.assert_file_exists(text_file_name) valid_time_unix_sec = _file_name_to_valid_time(text_file_name) front_types = [] latitudes_2d_list_deg = [] longitudes_2d_list_deg = [] for this_line in open(text_file_name, 'r').readlines(): these_words = this_line.split() # Need to skip empty lines. if not these_words: continue this_front_type = these_words[0].lower() if this_front_type not in VALID_FRONT_TYPES: continue these_words = these_words[1:] this_num_points = len(these_words) these_latitudes_deg = numpy.full(this_num_points, numpy.nan) these_longitudes_deg = numpy.full(this_num_points, numpy.nan) for i in range(this_num_points): these_latitudes_deg[i], these_longitudes_deg[ i] = _string_to_latlng(these_words[i], False) if numpy.any(numpy.isnan(these_latitudes_deg)): continue error_checking.assert_is_valid_lat_numpy_array(these_latitudes_deg) these_longitudes_deg = lng_conversion.convert_lng_positive_in_west( these_longitudes_deg, allow_nan=False) front_types.append(this_front_type) latitudes_2d_list_deg.append(these_latitudes_deg) longitudes_2d_list_deg.append(these_longitudes_deg) num_fronts = len(front_types) valid_times_unix_sec = numpy.full(num_fronts, valid_time_unix_sec, dtype=int) front_dict = { front_utils.FRONT_TYPE_COLUMN: front_types, front_utils.TIME_COLUMN: valid_times_unix_sec, front_utils.LATITUDES_COLUMN: latitudes_2d_list_deg, front_utils.LONGITUDES_COLUMN: longitudes_2d_list_deg } return pandas.DataFrame.from_dict(front_dict)
def read_winds_from_raw_file(netcdf_file_name, secondary_source=None, raise_error_if_fails=True): """Reads wind observations from raw file. This file should contain all fields for one secondary data source and one hour. :param netcdf_file_name: Path to input file. :param secondary_source: String ID for secondary data source. :param raise_error_if_fails: Boolean flag. If True and the read fails, this method will raise an error. If False and the read fails, this method will return None. :return: wind_table: If file cannot be opened and raise_error_if_fails = False, this is None. Otherwise, it is a pandas DataFrame with the following columns. wind_table.station_id: String ID for station. wind_table.station_name: Verbose name for station. wind_table.latitude_deg: Latitude (deg N). wind_table.longitude_deg: Longitude (deg E). wind_table.elevation_m_asl: Elevation (metres above sea level). wind_table.unix_time_sec: Observation time (seconds since 0000 UTC 1 Jan 1970). wind_table.wind_speed_m_s01: Speed of sustained wind (m/s). wind_table.wind_direction_deg: Direction of sustained wind (degrees of origin -- i.e., direction that the wind is coming from -- as per meteorological convention). wind_table.wind_gust_speed_m_s01: Speed of wind gust (m/s). wind_table.wind_gust_direction_deg: Direction of wind gust (degrees of origin). """ error_checking.assert_file_exists(netcdf_file_name) netcdf_dataset = netcdf_io.open_netcdf(netcdf_file_name, raise_error_if_fails) if netcdf_dataset is None: return None # TODO(thunderhoser): This is hacky (accounts for length-0 arrays of station # names). Find a better way to handle this exception. try: station_names = _char_matrix_to_string_list( netcdf_dataset.variables[STATION_NAME_COLUMN_ORIG][:]) except IndexError: return None try: station_ids = _char_matrix_to_string_list( netcdf_dataset.variables[STATION_ID_COLUMN_ORIG][:]) except KeyError: station_ids = station_names for i in range(len(station_ids)): station_ids[i] = raw_wind_io.append_source_to_station_id( station_ids[i], primary_source=raw_wind_io.MADIS_DATA_SOURCE, secondary_source=secondary_source) try: unix_times_sec = netcdf_dataset.variables[TIME_COLUMN_ORIG][:] except KeyError: unix_times_sec = netcdf_dataset.variables[TIME_COLUMN_ORIG_BACKUP][:] wind_speeds_m_s01 = netcdf_dataset.variables[WIND_SPEED_COLUMN_ORIG][:] wind_speed_quality_flags = netcdf_dataset.variables[ WIND_SPEED_FLAG_COLUMN_ORIG][:] num_observations = len(wind_speeds_m_s01) try: wind_directions_deg = netcdf_dataset.variables[WIND_DIR_COLUMN_ORIG][:] wind_dir_quality_flags = netcdf_dataset.variables[ WIND_DIR_FLAG_COLUMN_ORIG][:] except KeyError: wind_directions_deg = numpy.full(num_observations, numpy.nan) wind_dir_quality_flags = [DEFAULT_QUALITY_FLAG] * num_observations try: wind_gust_speeds_m_s01 = netcdf_dataset.variables[ WIND_GUST_SPEED_COLUMN_ORIG][:] wind_gust_speed_quality_flags = netcdf_dataset.variables[ WIND_GUST_SPEED_FLAG_COLUMN_ORIG][:] except KeyError: wind_gust_speeds_m_s01 = numpy.full(num_observations, numpy.nan) wind_gust_speed_quality_flags = ( [DEFAULT_QUALITY_FLAG] * num_observations) try: wind_gust_directions_deg = netcdf_dataset.variables[ WIND_GUST_DIR_COLUMN_ORIG][:] wind_gust_dir_quality_flags = netcdf_dataset.variables[ WIND_GUST_DIR_FLAG_COLUMN_ORIG][:] except KeyError: wind_gust_directions_deg = numpy.full(num_observations, numpy.nan) wind_gust_dir_quality_flags = [DEFAULT_QUALITY_FLAG] * num_observations wind_dict = {raw_wind_io.STATION_ID_COLUMN: station_ids, raw_wind_io.STATION_NAME_COLUMN: station_names, raw_wind_io.LATITUDE_COLUMN: netcdf_dataset.variables[ LATITUDE_COLUMN_ORIG][:], raw_wind_io.LONGITUDE_COLUMN: netcdf_dataset.variables[ LONGITUDE_COLUMN_ORIG][:], raw_wind_io.ELEVATION_COLUMN: netcdf_dataset.variables[ ELEVATION_COLUMN_ORIG][:], raw_wind_io.TIME_COLUMN: numpy.array(unix_times_sec).astype( int), raw_wind_io.WIND_SPEED_COLUMN: wind_speeds_m_s01, raw_wind_io.WIND_DIR_COLUMN: wind_directions_deg, raw_wind_io.WIND_GUST_SPEED_COLUMN: wind_gust_speeds_m_s01, raw_wind_io.WIND_GUST_DIR_COLUMN: wind_gust_directions_deg, WIND_SPEED_FLAG_COLUMN: wind_speed_quality_flags, WIND_DIR_FLAG_COLUMN: wind_dir_quality_flags, WIND_GUST_SPEED_FLAG_COLUMN: wind_gust_speed_quality_flags, WIND_GUST_DIR_FLAG_COLUMN: wind_gust_dir_quality_flags} netcdf_dataset.close() wind_table = pandas.DataFrame.from_dict(wind_dict) wind_table = _remove_invalid_wind_rows(wind_table) return _remove_low_quality_data(wind_table)