Example #1
0
def resize_image(input_file_name,
                 output_file_name,
                 output_size_pixels,
                 convert_exe_name=DEFAULT_CONVERT_EXE_NAME):
    """Resizes image.

    :param input_file_name: Path to input file (may be in any format handled by
        ImageMagick).
    :param output_file_name: Path to output file.
    :param output_size_pixels: Output size.
    :param convert_exe_name: See doc for `trim_whitespace`.
    :raises: ValueError: if ImageMagick command (which is ultimately a Unix
        command) fails.
    """

    error_checking.assert_file_exists(input_file_name)
    file_system_utils.mkdir_recursive_if_necessary(file_name=output_file_name)
    error_checking.assert_is_integer(output_size_pixels)
    error_checking.assert_is_greater(output_size_pixels, 0)
    error_checking.assert_file_exists(convert_exe_name)

    command_string = '"{0:s}" "{1:s}" -resize {2:d}@ "{3:s}"'.format(
        convert_exe_name, input_file_name, output_size_pixels,
        output_file_name)

    exit_code = os.system(command_string)
    if exit_code == 0:
        return

    raise ValueError(ERROR_STRING)
Example #2
0
def trim_whitespace(input_file_name,
                    output_file_name,
                    border_width_pixels=10,
                    convert_exe_name=DEFAULT_CONVERT_EXE_NAME):
    """Trims whitespace around edge of image.

    :param input_file_name: Path to input file (may be in any format handled by
        ImageMagick).
    :param output_file_name: Path to output file.
    :param border_width_pixels: Desired border width (whitespace).
    :param convert_exe_name: Path to executable file for ImageMagick's "convert"
        function.  If you installed ImageMagick with root access, this should be
        the default.  Regardless, the pathless file name should be just
        "convert".
    :raises: ValueError: if ImageMagick command (which is ultimately a Unix
        command) fails.
    """

    error_checking.assert_file_exists(input_file_name)
    file_system_utils.mkdir_recursive_if_necessary(file_name=output_file_name)
    error_checking.assert_is_integer(border_width_pixels)
    error_checking.assert_is_geq(border_width_pixels, 0)
    error_checking.assert_file_exists(convert_exe_name)

    command_string = (
        '"{0:s}" "{1:s}" -trim -bordercolor White -border {2:d} "{3:s}"'
    ).format(convert_exe_name, input_file_name, border_width_pixels,
             output_file_name)

    exit_code = os.system(command_string)
    if exit_code == 0:
        return
    raise ValueError(ERROR_STRING)
def gzip_file(input_file_name, output_file_name=None, delete_input_file=True):
    """Creates gzip archive with one file.

    :param input_file_name: Path to input file (will be gzipped).
    :param output_file_name: Path to output file (extension must be ".gz").  If
        `output_file_name is None`, will simply append ".gz" to name of input
        file.
    :param delete_input_file: Boolean flag.  If True, will delete input file
        after gzipping.
    :raises: ValueError: if `output_file_name` does not end with ".gz".
    :raises: ValueError: if the Unix command fails.
    """

    error_checking.assert_file_exists(input_file_name)
    error_checking.assert_is_boolean(delete_input_file)
    if output_file_name is None:
        output_file_name = '{0:s}.gz'.format(input_file_name)

    if not output_file_name.endswith('.gz'):
        error_string = (
            'Output file ("{0:s}") should have extension ".gz".'
        ).format(output_file_name)
        raise ValueError(error_string)

    unix_command_string = 'gzip -v -c "{0:s}" > "{1:s}"'.format(
        input_file_name, output_file_name)
    exit_code = os.system(unix_command_string)

    if exit_code != 0:
        raise ValueError('\nUnix command failed (log messages shown above '
                         'should explain why).')

    if delete_input_file:
        os.remove(input_file_name)
Example #4
0
def read_field_from_full_grid_file(netcdf_file_name,
                                   field_name=None,
                                   metadata_dict=None,
                                   raise_error_if_fails=True):
    """Reads one radar field from full-grid (not sparse-grid) file.

    This file should contain all radar variables for one time step.

    M = number of rows (unique grid-point latitudes)
    N = number of columns (unique grid-point longitudes)
    H = number of height levels (unique grid-point heights)

    :param netcdf_file_name: Path to input file.
    :param field_name: Name of radar field.
    :param metadata_dict: Dictionary created by
        read_metadata_from_full_grid_file.
    :param raise_error_if_fails: Boolean flag.  If True and file cannot be
        opened, this method will raise an error.  If False and file cannot be
        opened, will return None for all output variables.
    :return: field_matrix: H-by-M-by-N numpy array with values of radar field.
    :return: grid_point_heights_m_asl: length-H numpy array of height levels
        (integer metres above sea level).  If array is increasing (decreasing),
        height increases (decreases) with the first index of field_matrix.
    :return: grid_point_latitudes_deg: length-M numpy array of grid-point
        latitudes (deg N).  If array is increasing (decreasing), latitude
        increases (decreases) with the second index of field_matrix.
    :return: grid_point_longitudes_deg: length-N numpy array of grid-point
        longitudes (deg N).  If array is increasing (decreasing), latitude
        increases (decreases) with the third index of field_matrix.
    """

    error_checking.assert_file_exists(netcdf_file_name)
    netcdf_dataset = netcdf_io.open_netcdf(netcdf_file_name,
                                           raise_error_if_fails)
    if netcdf_dataset is None:
        return None, None, None, None

    field_name_orig = radar_utils.field_name_new_to_orig(
        field_name=field_name, data_source_name=radar_utils.GRIDRAD_SOURCE_ID)
    field_matrix = numpy.array(
        netcdf_dataset.variables[field_name_orig][0, :, :, :])

    grid_point_latitudes_deg = numpy.array(
        netcdf_dataset.variables[LATITUDE_NAME_ORIG])
    grid_point_longitudes_deg = lng_conversion.convert_lng_positive_in_west(
        numpy.array(netcdf_dataset.variables[LONGITUDE_NAME_ORIG]))

    _check_grid_points(grid_point_latitudes_deg=grid_point_latitudes_deg,
                       grid_point_longitudes_deg=grid_point_longitudes_deg,
                       metadata_dict=metadata_dict)

    grid_point_heights_m_asl = KM_TO_METRES * numpy.array(
        netcdf_dataset.variables[HEIGHT_NAME_ORIG])
    grid_point_heights_m_asl = numpy.round(grid_point_heights_m_asl).astype(
        int)

    netcdf_dataset.close()
    return (field_matrix, grid_point_heights_m_asl, grid_point_latitudes_deg,
            grid_point_longitudes_deg)
def read_5minute_winds_from_raw_file(text_file_name, utc_offset_hours):
    """Reads 5-minute wind observations from raw file.

    This file should contain 5-minute METARs for one station-month (see
    download_5minute_file).

    :param text_file_name: Path to input file.
    :param utc_offset_hours: Difference between local station time and UTC
        (local minus UTC).
    :return: wind_table: pandas DataFrame with the following columns.
    wind_table.unix_time_sec: Observation time (seconds since 0000 UTC 1 Jan
        1970).
    wind_table.wind_speed_m_s01: Speed of sustained wind (m/s).
    wind_table.wind_direction_deg: Direction of sustained wind (degrees of
        origin -- i.e., direction that the wind is coming from -- as per
        meteorological convention).
    wind_table.wind_gust_speed_m_s01: Speed of wind gust (m/s).
    wind_table.wind_gust_direction_deg: Direction of wind gust (degrees of
        origin).
    """

    error_checking.assert_file_exists(text_file_name)
    error_checking.assert_is_not_nan(utc_offset_hours)

    unix_times_sec = []
    wind_speeds_m_s01 = []
    wind_directions_deg = []
    wind_gust_speeds_m_s01 = []
    wind_gust_directions_deg = []

    for this_line in open(text_file_name, 'r').readlines():
        this_local_time_string = (
            this_line[LOCAL_TIME_CHAR_INDICES_5MINUTE_FILE[0]:
                      LOCAL_TIME_CHAR_INDICES_5MINUTE_FILE[1]])
        this_time_unix_sec = _local_time_string_to_unix_sec(
            this_local_time_string, utc_offset_hours)

        (this_wind_speed_m_s01, this_wind_direction_deg,
         this_wind_gust_speed_m_s01, this_wind_gust_direction_deg
         ) = _parse_5minute_wind_from_line(this_line)

        unix_times_sec.append(this_time_unix_sec)
        wind_speeds_m_s01.append(this_wind_speed_m_s01)
        wind_directions_deg.append(this_wind_direction_deg)
        wind_gust_speeds_m_s01.append(this_wind_gust_speed_m_s01)
        wind_gust_directions_deg.append(this_wind_gust_direction_deg)

    wind_dict = {
        raw_wind_io.WIND_SPEED_COLUMN: wind_speeds_m_s01,
        raw_wind_io.WIND_DIR_COLUMN: wind_directions_deg,
        raw_wind_io.WIND_GUST_SPEED_COLUMN: wind_gust_speeds_m_s01,
        raw_wind_io.WIND_GUST_DIR_COLUMN: wind_gust_directions_deg,
        raw_wind_io.TIME_COLUMN: unix_times_sec
    }

    wind_table = pandas.DataFrame.from_dict(wind_dict)
    wind_table[raw_wind_io.WIND_SPEED_COLUMN] *= KT_TO_METRES_PER_SECOND
    wind_table[raw_wind_io.WIND_GUST_SPEED_COLUMN] *= KT_TO_METRES_PER_SECOND
    return _remove_invalid_wind_rows(wind_table)
Example #6
0
def read_data_from_sparse_grid_file(netcdf_file_name, field_name_orig=None,
                                    data_source=None, sentinel_values=None,
                                    raise_error_if_fails=True):
    """Reads sparse radar grid from raw (either MYRORSS or MRMS) file.

    This file should contain one radar field at one height and one time step.

    :param netcdf_file_name: Path to input file.
    :param field_name_orig: Name of radar field in original (either MYRORSS or
        MRMS) format.
    :param data_source: Data source (either "myrorss" or "mrms").
    :param sentinel_values: 1-D numpy array of sentinel values.
    :param raise_error_if_fails: Boolean flag.  If True and file cannot be
        opened, this method will raise an error.  If False and file cannot be
        opened, this method will return None.
    :return: sparse_grid_table: pandas DataFrame with the following columns.
        Each row corresponds to one grid cell.
    sparse_grid_table.grid_row: Row index.
    sparse_grid_table.grid_column: Column index.
    sparse_grid_table.<field_name>: Radar measurement (field_name is determined
        by the method `field_name_orig_to_new`).
    sparse_grid_table.num_grid_cells: Number of consecutive grid cells --
        starting at the current one and counting along rows first, columns
        second -- with the same radar measurement.
    """

    error_checking.assert_file_exists(netcdf_file_name)
    error_checking.assert_is_numpy_array_without_nan(sentinel_values)
    error_checking.assert_is_numpy_array(sentinel_values, num_dimensions=1)

    netcdf_dataset = netcdf_io.open_netcdf(netcdf_file_name,
                                           raise_error_if_fails)
    if netcdf_dataset is None:
        return None

    field_name = _field_name_orig_to_new(field_name_orig,
                                         data_source=data_source)
    num_values = len(netcdf_dataset.variables[GRID_ROW_COLUMN_ORIG])

    if num_values == 0:
        sparse_grid_dict = {
            GRID_ROW_COLUMN: numpy.array([], dtype=int),
            GRID_COLUMN_COLUMN: numpy.array([], dtype=int),
            NUM_GRID_CELL_COLUMN: numpy.array([], dtype=int),
            field_name: numpy.array([], dtype=int)}
    else:
        sparse_grid_dict = {
            GRID_ROW_COLUMN: netcdf_dataset.variables[GRID_ROW_COLUMN_ORIG][:],
            GRID_COLUMN_COLUMN:
                netcdf_dataset.variables[GRID_COLUMN_COLUMN_ORIG][:],
            NUM_GRID_CELL_COLUMN:
                netcdf_dataset.variables[NUM_GRID_CELL_COLUMN_ORIG][:],
            field_name: netcdf_dataset.variables[field_name_orig][:]}

    netcdf_dataset.close()
    sparse_grid_table = pandas.DataFrame.from_dict(sparse_grid_dict)
    return _remove_sentinels_from_sparse_grid(
        sparse_grid_table, field_name, sentinel_values)
Example #7
0
def read_stats_from_xml(xml_file_name, spc_date_unix_sec=None):
    """Reads storm statistics from XML file.

    :param xml_file_name: Path to input file.
    :param spc_date_unix_sec: SPC date in Unix format.
    :return: stats_table: pandas DataFrame with the following columns.
    stats_table.storm_id: String ID for storm cell.
    stats_table.east_velocity_m_s01: Eastward velocity (m/s).
    stats_table.north_velocity_m_s01: Northward velocity (m/s).
    stats_table.age_sec: Age of storm cell (seconds).
    """

    error_checking.assert_file_exists(xml_file_name)
    xml_tree = _open_xml_file(xml_file_name)

    storm_dict = {}
    this_column_name = None
    this_column_name_orig = None
    this_column_values = None

    for this_element in xml_tree.iter():
        if this_element.tag == 'datacolumn':
            if this_column_name_orig in XML_COLUMN_NAMES_ORIG:
                storm_dict.update({this_column_name: this_column_values})

            this_column_name_orig = this_element.attrib['name']
            if this_column_name_orig in XML_COLUMN_NAMES_ORIG:
                this_column_name = _xml_column_name_orig_to_new(
                    this_column_name_orig)
                this_column_values = []

            continue

        if this_column_name_orig not in XML_COLUMN_NAMES_ORIG:
            continue

        if this_column_name == tracking_io.STORM_ID_COLUMN:
            this_column_values.append(this_element.attrib['value'])
        elif this_column_name == tracking_io.NORTH_VELOCITY_COLUMN:
            this_column_values.append(-1 * float(this_element.attrib['value']))
        elif this_column_name == tracking_io.EAST_VELOCITY_COLUMN:
            this_column_values.append(float(this_element.attrib['value']))
        elif this_column_name == tracking_io.AGE_COLUMN:
            this_column_values.append(
                int(numpy.round(float(this_element.attrib['value']))))

    stats_table = pandas.DataFrame.from_dict(storm_dict)

    spc_date_string = time_conversion.time_to_spc_date_string(
        spc_date_unix_sec)
    storm_ids = _append_spc_date_to_storm_ids(
        stats_table[tracking_io.STORM_ID_COLUMN].values, spc_date_string)

    stats_table = stats_table.assign(
        **{tracking_io.STORM_ID_COLUMN: storm_ids})
    return tracking_io.remove_rows_with_nan(stats_table)
Example #8
0
def read_keras_model(hdf5_file_name):
    """Reads Keras model from HDF5 file.

    :param hdf5_file_name: Path to input file.
    :return: keras_model_object: Instance of `keras.models.Model`.
    """

    error_checking.assert_file_exists(hdf5_file_name)
    return load_model(hdf5_file_name,
                      custom_objects=CUSTOM_OBJECT_DICT_FOR_READING_MODEL)
Example #9
0
def read_model(hdf5_file_name):
    """Reads model from HDF5 file.

    :param hdf5_file_name: Path to input file.
    :return: model_object: Instance of `keras.models.Model`.
    """

    error_checking.assert_file_exists(hdf5_file_name)
    return keras.models.load_model(hdf5_file_name,
                                   custom_objects=PERFORMANCE_METRIC_DICT)
Example #10
0
def read_processed_file(csv_file_name):
    """Reads wind observations from processed file.

    Path to input file.
    :return: wind_table: See documentation for write_processed_file.
    """

    error_checking.assert_file_exists(csv_file_name)
    return pandas.read_csv(
        csv_file_name, header=0, usecols=WIND_COLUMNS,
        dtype=WIND_COLUMN_TYPE_DICT)
Example #11
0
def read_metadata_from_full_grid_file(netcdf_file_name,
                                      raise_error_if_fails=True):
    """Reads metadata from full-grid (not sparse-grid) file.

    This file should contain all radar variables for one time step.

    :param netcdf_file_name: Path to input file.
    :param raise_error_if_fails: Boolean flag.  If True and file cannot be
        opened, this method will raise an error.  If False and file cannot be
        opened, will return None.
    :return: metadata_dict: Dictionary with the following keys.
    metadata_dict['nw_grid_point_lat_deg']: Latitude (deg N) of northwesternmost
        grid point.
    metadata_dict['nw_grid_point_lng_deg']: Longitude (deg E) of
        northwesternmost grid point.
    metadata_dict['lat_spacing_deg']: Spacing (deg N) between adjacent rows.
    metadata_dict['lng_spacing_deg']: Spacing (deg E) between adjacent columns.
    metadata_dict['num_lat_in_grid']: Number of rows (unique grid-point
        latitudes).
    metadata_dict['num_lng_in_grid']: Number of columns (unique grid-point
        longitudes).
    metadata_dict['unix_time_sec']: Valid time.
    """

    error_checking.assert_file_exists(netcdf_file_name)
    netcdf_dataset = netcdf_io.open_netcdf(netcdf_file_name,
                                           raise_error_if_fails)
    if netcdf_dataset is None:
        return None

    grid_point_latitudes_deg = numpy.array(
        netcdf_dataset.variables[LATITUDE_NAME_ORIG])
    grid_point_longitudes_deg = lng_conversion.convert_lng_positive_in_west(
        numpy.array(netcdf_dataset.variables[LONGITUDE_NAME_ORIG]))

    metadata_dict = {
        radar_utils.NW_GRID_POINT_LAT_COLUMN:
        numpy.max(grid_point_latitudes_deg),
        radar_utils.NW_GRID_POINT_LNG_COLUMN:
        numpy.min(grid_point_longitudes_deg),
        radar_utils.LAT_SPACING_COLUMN:
        numpy.mean(numpy.diff(grid_point_latitudes_deg)),
        radar_utils.LNG_SPACING_COLUMN:
        numpy.mean(numpy.diff(grid_point_longitudes_deg)),
        radar_utils.NUM_LAT_COLUMN:
        len(grid_point_latitudes_deg),
        radar_utils.NUM_LNG_COLUMN:
        len(grid_point_longitudes_deg),
        radar_utils.UNIX_TIME_COLUMN:
        _time_from_gridrad_to_unix(netcdf_dataset.variables[TIME_NAME_ORIG][0])
    }

    netcdf_dataset.close()
    return metadata_dict
Example #12
0
def read_gridrad_stats_from_thea(csv_file_name):
    """Reads radar statistics created by GridRad software (file format by Thea).

    :param csv_file_name: Path to input file.
    :return: gridrad_statistic_table: pandas DataFrame with mandatory columns
        listed below.  Other column names come from the list
        `GRIDRAD_STATISTIC_NAMES`.
    gridrad_statistic_table.storm_number: Numeric ID (integer) for storm cell.
    gridrad_statistic_table.unix_time_sec: Valid time of storm object.
    """

    error_checking.assert_file_exists(csv_file_name)
    gridrad_statistic_table = pandas.read_csv(csv_file_name, header=0, sep=',')

    # Convert times from Thea's format to Unix format.
    unix_times_sec = numpy.array([
        time_conversion.string_to_unix_sec(s, GRIDRAD_TIME_FORMAT)
        for s in gridrad_statistic_table[TIME_NAME_GRIDRAD_ORIG].values
    ])
    gridrad_statistic_table = gridrad_statistic_table.assign(
        **{tracking_utils.TIME_COLUMN: unix_times_sec})

    columns_to_keep = GRIDRAD_STATISTIC_NAMES_ORIG + [
        STORM_NUMBER_NAME_GRIDRAD_ORIG, tracking_utils.TIME_COLUMN
    ]
    gridrad_statistic_table = gridrad_statistic_table[columns_to_keep]

    # Rename columns.
    column_dict_old_to_new = {
        STORM_NUMBER_NAME_GRIDRAD_ORIG: STORM_NUMBER_NAME_GRIDRAD,
        ECHO_TOP_40DBZ_NAME_GRIDRAD_ORIG: ECHO_TOP_40DBZ_NAME_GRIDRAD,
        SPECTRUM_WIDTH_NAME_GRIDRAD_ORIG: SPECTRUM_WIDTH_NAME_GRIDRAD,
        MAX_DIVERGENCE_NAME_GRIDRAD_ORIG: MAX_DIVERGENCE_NAME_GRIDRAD,
        UPPER_LEVEL_DIVERGENCE_NAME_GRIDRAD_ORIG:
        UPPER_LEVEL_DIVERGENCE_NAME_GRIDRAD,
        LOW_LEVEL_CONVERGENCE_NAME_GRIDRAD_ORIG:
        LOW_LEVEL_CONVERGENCE_NAME_GRIDRAD,
        DIVERGENCE_AREA_NAME_GRIDRAD_ORIG: DIVERGENCE_AREA_NAME_GRIDRAD,
        MAX_ROTATION_NAME_GRIDRAD_ORIG: MAX_ROTATION_NAME_GRIDRAD,
        UPPER_LEVEL_ROTATION_NAME_GRIDRAD_ORIG:
        UPPER_LEVEL_ROTATION_NAME_GRIDRAD,
        LOW_LEVEL_ROTATION_NAME_GRIDRAD_ORIG: LOW_LEVEL_ROTATION_NAME_GRIDRAD
    }

    gridrad_statistic_table.rename(columns=column_dict_old_to_new,
                                   inplace=True)

    # Convert units of divergence/convergence.
    gridrad_statistic_table[LOW_LEVEL_CONVERGENCE_NAME_GRIDRAD] *= -1
    for this_name in GRIDRAD_DIVERGENCE_NAMES:
        gridrad_statistic_table[
            this_name] *= CONVERSION_RATIO_FOR_GRIDRAD_DIVERGENCE

    return gridrad_statistic_table
Example #13
0
def read_station_metadata_from_processed_file(csv_file_name):
    """Reads metadata for weather stations from file.

    :param csv_file_name: Path to input file.
    :return: station_metadata_table: See documentation for
        write_station_metadata_to_processed_file.
    """

    error_checking.assert_file_exists(csv_file_name)
    return pandas.read_csv(
        csv_file_name, header=0, usecols=STATION_METADATA_COLUMNS,
        dtype=STATION_METADATA_COLUMN_TYPE_DICT)
Example #14
0
def unzip_1day_tar_file(tar_file_name, spc_date_string, top_target_dir_name,
                        scales_to_extract_metres2):
    """Unzips tar file with segmotion output for one SPC date.

    :param tar_file_name: Path to input file.
    :param spc_date_string: SPC date (format "yyyymmdd").
    :param top_target_dir_name: Name of top-level output directory.
    :param scales_to_extract_metres2: 1-D numpy array of tracking scales to
        extract.
    :return: target_directory_name: Path to output directory.  This will be
        "<top_target_directory_name>/<yyyymmdd>", where <yyyymmdd> is the SPC
        date.
    """

    # Verification.
    _ = time_conversion.spc_date_string_to_unix_sec(spc_date_string)
    error_checking.assert_file_exists(tar_file_name)
    error_checking.assert_is_greater_numpy_array(scales_to_extract_metres2, 0)
    error_checking.assert_is_numpy_array(scales_to_extract_metres2,
                                         num_dimensions=1)

    scales_to_extract_metres2 = numpy.round(scales_to_extract_metres2).astype(
        int)

    num_scales_to_extract = len(scales_to_extract_metres2)
    directory_names_to_unzip = []

    for j in range(num_scales_to_extract):
        this_relative_stats_dir_name = '{0:s}/{1:s}'.format(
            spc_date_string,
            _get_relative_stats_dir_physical_scale(
                scales_to_extract_metres2[j]))

        this_relative_polygon_dir_name = '{0:s}/{1:s}'.format(
            spc_date_string,
            _get_relative_polygon_dir_physical_scale(
                scales_to_extract_metres2[j]))

        directory_names_to_unzip.append(
            this_relative_stats_dir_name.replace(spc_date_string + '/', ''))
        directory_names_to_unzip.append(
            this_relative_polygon_dir_name.replace(spc_date_string + '/', ''))

    target_directory_name = '{0:s}/{1:s}/{2:s}'.format(top_target_dir_name,
                                                       spc_date_string[:4],
                                                       spc_date_string)

    unzipping.unzip_tar(tar_file_name,
                        target_directory_name=target_directory_name,
                        file_and_dir_names_to_unzip=directory_names_to_unzip)

    return target_directory_name
def open_netcdf(netcdf_file_name, raise_error_if_fails=False):
    """Attempts to open NetCDF file.

    Code for handling gzip files comes from jochen at the following
    StackOverflow page: https://stackoverflow.com/posts/45356133/revisions

    :param netcdf_file_name: Path to input file.
    :param raise_error_if_fails: Boolean flag.  If raise_error_if_fails = True
        and file cannot be opened, this method will throw an error.
    :return: netcdf_dataset: Instance of `NetCDF4.Dataset`, containing all data
        from the file.  If raise_error_if_fails = False and file could not be
        opened, this will be None.
    :raises: IOError: if file could not be opened and raise_error_if_fails =
        True.
    """

    error_checking.assert_file_exists(netcdf_file_name)
    error_checking.assert_is_boolean(raise_error_if_fails)
    gzip_as_input = netcdf_file_name.endswith(GZIP_FILE_EXTENSION)

    if gzip_as_input:
        gzip_file_object = gzip.open(netcdf_file_name, 'rb')
        netcdf_temporary_file_object = tempfile.NamedTemporaryFile(delete=False)
        netcdf_file_name = netcdf_temporary_file_object.name

        success = False
        try:
            shutil.copyfileobj(gzip_file_object, netcdf_temporary_file_object)
            success = True
        except:
            if raise_error_if_fails:
                raise

        gzip_file_object.close()
        netcdf_temporary_file_object.close()
        if not success:
            os.remove(netcdf_file_name)
            return None

    try:
        netcdf_dataset = Dataset(netcdf_file_name)
    except IOError:
        if raise_error_if_fails:
            if gzip_as_input:
                os.remove(netcdf_file_name)
            raise

        netcdf_dataset = None

    if gzip_as_input:
        os.remove(netcdf_file_name)
    return netcdf_dataset
Example #16
0
def read_data_from_full_grid_file(netcdf_file_name, metadata_dict,
                                  raise_error_if_fails=True):
    """Reads full radar grid from raw (either MYRORSS or MRMS) file.

    This file should contain one radar field at one height and one time step.

    M = number of rows (unique grid-point latitudes)
    N = number of columns (unique grid-point longitudes)

    :param netcdf_file_name: Path to input file.
    :param metadata_dict: Dictionary with metadata for NetCDF file, created by
        read_metadata_from_raw_file.
    :param raise_error_if_fails: Boolean flag.  If True and file cannot be
        opened, this method will raise an error.  If False and file cannot be
        opened, this method will return None for all output variables.
    :return: field_matrix: M-by-N numpy array with values of radar field.
    :return: unique_grid_point_lat_deg: length-M numpy array of grid-point
        latitudes (deg N).  If array is increasing (decreasing), latitude
        increases (decreases) while traveling down the columns of field_matrix.
    :return: unique_grid_point_lng_deg: length-N numpy array of grid-point
        longitudes (deg E).  If array is increasing (decreasing), longitude
        increases (decreases) while traveling right across the rows of
        field_matrix.
    """

    error_checking.assert_file_exists(netcdf_file_name)

    netcdf_dataset = netcdf_io.open_netcdf(netcdf_file_name,
                                           raise_error_if_fails)
    if netcdf_dataset is None:
        return None, None, None

    field_matrix = netcdf_dataset.variables[
        metadata_dict[FIELD_NAME_COLUMN_ORIG]]
    netcdf_dataset.close()

    min_latitude_deg = metadata_dict[NW_GRID_POINT_LAT_COLUMN] - (
        metadata_dict[LAT_SPACING_COLUMN] * (metadata_dict[NUM_LAT_COLUMN] - 1))
    unique_grid_point_lat_deg, unique_grid_point_lng_deg = (
        grids.get_latlng_grid_points(
            min_latitude_deg=min_latitude_deg,
            min_longitude_deg=metadata_dict[NW_GRID_POINT_LNG_COLUMN],
            lat_spacing_deg=metadata_dict[LAT_SPACING_COLUMN],
            lng_spacing_deg=metadata_dict[LNG_SPACING_COLUMN],
            num_rows=metadata_dict[NUM_LAT_COLUMN],
            num_columns=metadata_dict[NUM_LNG_COLUMN]))

    field_matrix = _remove_sentinels_from_full_grid(
        field_matrix, metadata_dict[SENTINEL_VALUE_COLUMN])
    return (numpy.flipud(field_matrix), unique_grid_point_lat_deg[::-1],
            unique_grid_point_lng_deg)
Example #17
0
def concatenate_images(input_file_names,
                       output_file_name,
                       num_panel_rows,
                       num_panel_columns,
                       border_width_pixels=50,
                       montage_exe_name=DEFAULT_MONTAGE_EXE_NAME):
    """Concatenates many images into one paneled image.

    :param input_file_names: 1-D list of paths to input files (may be in any
        format handled by ImageMagick).
    :param output_file_name: Path to output file.
    :param num_panel_rows: Number of rows in paneled image.
    :param num_panel_columns: Number of columns in paneled image.
    :param border_width_pixels: Border width (whitespace) around each pixel.
    :param montage_exe_name: Path to executable file for ImageMagick's "montage"
        function.  If you installed ImageMagick with root access, this should be
        the default.  Regardless, the pathless file name should be just
        "montage".
    :raises: ValueError: if ImageMagick command (which is ultimately a Unix
        command) fails.
    """

    error_checking.assert_is_numpy_array(numpy.array(input_file_names),
                                         num_dimensions=1)
    for this_file_name in input_file_names:
        error_checking.assert_file_exists(this_file_name)

    file_system_utils.mkdir_recursive_if_necessary(file_name=output_file_name)
    error_checking.assert_is_integer(num_panel_rows)
    error_checking.assert_is_integer(num_panel_columns)
    error_checking.assert_is_integer(border_width_pixels)
    error_checking.assert_is_geq(border_width_pixels, 0)
    error_checking.assert_file_exists(montage_exe_name)

    num_panels = num_panel_rows * num_panel_columns
    error_checking.assert_is_geq(num_panels, len(input_file_names))

    command_string = '"{0:s}" -mode concatenate -tile {1:d}x{2:d}'.format(
        montage_exe_name, num_panel_columns, num_panel_rows)

    for this_file_name in input_file_names:
        command_string += ' "{0:s}"'.format(this_file_name)

    command_string += ' -trim -bordercolor White -border {0:d} "{1:s}"'.format(
        border_width_pixels, output_file_name)

    exit_code = os.system(command_string)
    if exit_code == 0:
        return
    raise ValueError(ERROR_STRING)
Example #18
0
def create_gif(input_file_names,
               output_file_name,
               num_seconds_per_frame,
               resize_factor=0.5,
               convert_exe_name=DEFAULT_CONVERT_EXE_NAME):
    """Creates GIF from static images.

    :param input_file_names: 1-D list of paths to input files (static images).
    :param output_file_name: Path to output file (GIF).
    :param num_seconds_per_frame: Number of seconds per frame.
    :param resize_factor: Resize factor.  When creating GIF, each static image
        (frame) will be resized to q times its original size, where q =
        `resize_factor`.  This will affect only the GIF.  The images themselves,
        at locations specified in `input_file_names`, will not be changed.
    :param convert_exe_name: See doc for `trim_whitespace`.
    :raises: ValueError: if ImageMagick command (which is ultimately a Unix
        command) fails.
    """

    error_checking.assert_is_string_list(input_file_names)
    error_checking.assert_is_numpy_array(numpy.array(input_file_names),
                                         num_dimensions=1)

    file_system_utils.mkdir_recursive_if_necessary(file_name=output_file_name)
    error_checking.assert_file_exists(convert_exe_name)

    error_checking.assert_is_greater(num_seconds_per_frame, 0.)
    error_checking.assert_is_leq(num_seconds_per_frame, 10.)
    error_checking.assert_is_geq(resize_factor, 0.2)
    error_checking.assert_is_leq(resize_factor, 1.)

    num_centiseconds_per_frame = int(numpy.round(100 * num_seconds_per_frame))
    num_centiseconds_per_frame = max([num_centiseconds_per_frame, 1])
    resize_percentage = int(numpy.round(100 * resize_factor))
    resize_percentage = max([resize_percentage, 1])

    command_string = '"{0:s}" -delay {1:d} '.format(
        convert_exe_name, num_centiseconds_per_frame)

    command_string += ' '.join(['"{0:s}"'.format(f) for f in input_file_names])

    command_string += ' -resize {0:d}% "{1:s}"'.format(resize_percentage,
                                                       output_file_name)

    exit_code = os.system(command_string)
    if exit_code == 0:
        return

    raise ValueError(ERROR_STRING)
Example #19
0
def read_data_from_full_grid_file(netcdf_file_name,
                                  metadata_dict,
                                  raise_error_if_fails=True):
    """Reads full radar grid from raw (either MYRORSS or MRMS) file.

    This file should contain one radar field at one height and valid time.

    :param netcdf_file_name: Path to input file.
    :param metadata_dict: Dictionary created by `read_metadata_from_raw_file`.
    :param raise_error_if_fails: Boolean flag.  If True and file cannot be read,
        this method will raise an error.  If False and file cannot be read, will
        return None for all output vars.
    :return: field_matrix: M-by-N numpy array with radar field.  Latitude
        increases while moving up each column, and longitude increases while
        moving right along each row.
    :return: grid_point_latitudes_deg: length-M numpy array of grid-point
        latitudes (deg N).  This array is monotonically decreasing.
    :return: grid_point_longitudes_deg: length-N numpy array of grid-point
        longitudes (deg E).  This array is monotonically increasing.
    """

    error_checking.assert_file_exists(netcdf_file_name)
    netcdf_dataset = netcdf_io.open_netcdf(netcdf_file_name,
                                           raise_error_if_fails)
    if netcdf_dataset is None:
        return None, None, None

    field_matrix = netcdf_dataset.variables[
        metadata_dict[FIELD_NAME_COLUMN_ORIG]]
    netcdf_dataset.close()

    min_latitude_deg = metadata_dict[radar_utils.NW_GRID_POINT_LAT_COLUMN] - (
        metadata_dict[radar_utils.LAT_SPACING_COLUMN] *
        (metadata_dict[radar_utils.NUM_LAT_COLUMN] - 1))
    grid_point_latitudes_deg, grid_point_longitudes_deg = (
        grids.get_latlng_grid_points(
            min_latitude_deg=min_latitude_deg,
            min_longitude_deg=metadata_dict[
                radar_utils.NW_GRID_POINT_LNG_COLUMN],
            lat_spacing_deg=metadata_dict[radar_utils.LAT_SPACING_COLUMN],
            lng_spacing_deg=metadata_dict[radar_utils.LNG_SPACING_COLUMN],
            num_rows=metadata_dict[radar_utils.NUM_LAT_COLUMN],
            num_columns=metadata_dict[radar_utils.NUM_LNG_COLUMN]))

    field_matrix = _remove_sentinels_from_full_grid(
        field_matrix, metadata_dict[radar_utils.SENTINEL_VALUE_COLUMN])
    return (numpy.flipud(field_matrix), grid_point_latitudes_deg[::-1],
            grid_point_longitudes_deg)
Example #20
0
def read_file(dill_file_name):
    """Reads set of isotonic-regression models from Dill file.

    :param dill_file_name: Path to input file.
    :return: scalar_model_objects: See doc for `train_models`.
    :return: vector_model_object_matrix: Same.
    """

    error_checking.assert_file_exists(dill_file_name)

    dill_file_handle = open(dill_file_name, 'rb')
    scalar_model_objects = dill.load(dill_file_handle)
    vector_model_object_matrix = dill.load(dill_file_handle)
    dill_file_handle.close()

    return scalar_model_objects, vector_model_object_matrix
def read_station_metadata_from_raw_file(csv_file_name):
    """Reads metadata for Oklahoma Mesonet stations from raw file.

    This file is provided by the Oklahoma Mesonet and can be found here:
    www.mesonet.org/index.php/api/siteinfo/from_all_active_with_geo_fields/
    format/csv/

    :param csv_file_name: Path to input file.
    :return: station_metadata_table: pandas DataFrame with the following
        columns.
    station_metadata_table.station_id: String ID for station.
    station_metadata_table.station_name: Verbose name for station.
    station_metadata_table.latitude_deg: Latitude (deg N).
    station_metadata_table.longitude_deg: Longitude (deg E).
    station_metadata_table.elevation_m_asl: Elevation (metres above sea level).
    """

    error_checking.assert_file_exists(csv_file_name)

    station_metadata_table = pandas.read_csv(
        csv_file_name,
        header=0,
        sep=',',
        dtype={ELEVATION_COLUMN_ORIG: numpy.float64})
    station_metadata_table = station_metadata_table[ORIG_METADATA_COLUMN_NAMES]

    column_dict_old_to_new = {
        STATION_ID_COLUMN_IN_METADATA: raw_wind_io.STATION_ID_COLUMN,
        STATION_NAME_COLUMN_ORIG: raw_wind_io.STATION_NAME_COLUMN,
        LATITUDE_COLUMN_ORIG: raw_wind_io.LATITUDE_COLUMN,
        LONGITUDE_COLUMN_ORIG: raw_wind_io.LONGITUDE_COLUMN,
        ELEVATION_COLUMN_ORIG: raw_wind_io.ELEVATION_COLUMN
    }

    station_metadata_table.rename(columns=column_dict_old_to_new, inplace=True)
    station_metadata_table = _remove_invalid_metadata_rows(
        station_metadata_table)

    num_stations = len(station_metadata_table.index)
    for i in range(num_stations):
        station_metadata_table[raw_wind_io.STATION_ID_COLUMN].values[i] = (
            raw_wind_io.append_source_to_station_id(
                station_metadata_table[
                    raw_wind_io.STATION_ID_COLUMN].values[i],
                primary_source=raw_wind_io.OK_MESONET_DATA_SOURCE))

    return station_metadata_table
Example #22
0
def read_points(netcdf_file_name):
    """Reads human points of interest for one image from NetCDF file.

    :param netcdf_file_name: Path to input file.
    :return: point_dict: Dictionary with the following keys.
    point_dict['full_storm_id_string']: See input doc for `write_points`.
    point_dict['storm_time_unix_sec']: Same.
    point_dict['grid_row_by_point']: Same.
    point_dict['grid_column_by_point']: Same.
    point_dict['panel_row_by_point']: Same.
    point_dict['panel_column_by_point']: Same.
    """

    error_checking.assert_file_exists(netcdf_file_name)
    dataset_object = netCDF4.Dataset(netcdf_file_name)

    point_dict = {
        STORM_ID_KEY:
        str(getattr(dataset_object, STORM_ID_KEY)),
        STORM_TIME_KEY:
        int(numpy.round(getattr(dataset_object, STORM_TIME_KEY))),
        GRID_ROW_BY_POINT_KEY:
        numpy.array(dataset_object.variables[GRID_ROW_BY_POINT_KEY][:],
                    dtype=float),
        GRID_COLUMN_BY_POINT_KEY:
        numpy.array(dataset_object.variables[GRID_COLUMN_BY_POINT_KEY][:],
                    dtype=float),
        PANEL_ROW_BY_POINT_KEY:
        numpy.array(dataset_object.variables[PANEL_ROW_BY_POINT_KEY][:],
                    dtype=int),
        PANEL_COLUMN_BY_POINT_KEY:
        numpy.array(dataset_object.variables[PANEL_COLUMN_BY_POINT_KEY][:],
                    dtype=int)
    }

    dataset_object.close()

    if point_dict[STORM_ID_KEY] == DUMMY_STORM_ID_STRING:
        point_dict[STORM_ID_KEY] = None
        point_dict[STORM_TIME_KEY] = None

    return point_dict
Example #23
0
def read_keras_model(hdf5_file_name, assumed_class_frequencies):
    """Reads Keras model from HDF5 file.

    :param hdf5_file_name: Path to input file.
    :param assumed_class_frequencies: See documentation for
        `get_unet_with_2d_convolution`.
    :return: keras_model_object: Instance of `keras.models.Model`.
    """

    error_checking.assert_file_exists(hdf5_file_name)

    class_weight_dict = ml_utils.get_class_weight_dict(
        assumed_class_frequencies)
    class_weights = numpy.array(class_weight_dict.values())
    class_weights = numpy.reshape(class_weights, (class_weights.size, 1))

    CUSTOM_OBJECT_DICT_FOR_READING_MODEL.update(
        {'loss': keras_losses.weighted_cross_entropy(class_weights)})
    return keras.models.load_model(
        hdf5_file_name, custom_objects=CUSTOM_OBJECT_DICT_FOR_READING_MODEL)
Example #24
0
def capture_mouse_clicks(image_file_name, instruction_string=''):
    """This interactive method captures coordinates of human mouse clicks.

    N = number of mouse clicks

    :param image_file_name: Path to image file.  This method will display the
        image in a figure window and allow you to click on top.
    :param instruction_string: String with instructions for the user.
    :return: point_objects_pixel_coords: length-N list of points (instances
        of `shapely.geometry.Point`), each containing a click location in pixel
        coordinates.
    :return: num_pixel_rows: Number of pixel rows in the image.
    :return: num_pixel_columns: Number of pixel columns in the image.
    """

    error_checking.assert_file_exists(image_file_name)
    error_checking.assert_is_string(instruction_string)

    image_matrix = Image.open(image_file_name)
    num_pixel_columns, num_pixel_rows = image_matrix.size

    global figure_object
    figure_object = pyplot.subplots(1,
                                    1,
                                    figsize=(FIGURE_WIDTH_INCHES,
                                             FIGURE_HEIGHT_INCHES))[0]

    pyplot.imshow(image_matrix)
    pyplot.title(instruction_string)

    connection_id = figure_object.canvas.mpl_connect('button_press_event',
                                                     _click_handler)
    pyplot.show()
    figure_object.canvas.mpl_disconnect(connection_id)

    point_objects_pixel_coords = [
        shapely.geometry.Point(this_x, this_y)
        for this_x, this_y in zip(x_coords_px, y_coords_px)
    ]

    return point_objects_pixel_coords, num_pixel_rows, num_pixel_columns
Example #25
0
def read_field_from_grib_file(grib_file_name,
                              field_name_grib1,
                              num_grid_rows,
                              num_grid_columns,
                              sentinel_value=None,
                              temporary_dir_name=None,
                              wgrib_exe_name=WGRIB_EXE_NAME_DEFAULT,
                              wgrib2_exe_name=WGRIB2_EXE_NAME_DEFAULT,
                              raise_error_if_fails=True):
    """Reads field from grib file.

    One field = one variable at one time step.

    M = number of rows (unique y-coordinates or latitudes of grid points)
    N = number of columns (unique x-coordinates or longitudes of grid points)

    :param grib_file_name: Path to input file.
    :param field_name_grib1: Field name in grib1 format (example: 500-mb height
        is "HGT:500 mb").
    :param num_grid_rows: Number of rows expected in grid.
    :param num_grid_columns: Number of columns expected in grid.
    :param sentinel_value: Sentinel value (all instances will be replaced with
        NaN).
    :param temporary_dir_name: Name of temporary directory.  An intermediate
        text file will be stored here.
    :param wgrib_exe_name: Path to wgrib executable.
    :param wgrib2_exe_name: Path to wgrib2 executable.
    :param raise_error_if_fails: Boolean flag.  If the extraction fails and
        raise_error_if_fails = True, this method will error out.  If the
        extraction fails and raise_error_if_fails = False, this method will
        return None.
    :return: field_matrix: M-by-N numpy array with values of the given field.
        If the grid is regular in x-y coordinates, x increases towards the right
        (in the positive direction of the second axis), while y increases
        downward (in the positive direction of the first axis).  If the grid is
        regular in lat-long, replace "x" and "y" in the previous sentence with
        "long" and "lat," respectively.
    :raises: ValueError: if extraction fails and raise_error_if_fails = True.
    """

    # Error-checking.
    error_checking.assert_is_string(field_name_grib1)
    error_checking.assert_is_integer(num_grid_rows)
    error_checking.assert_is_greater(num_grid_rows, 0)
    error_checking.assert_is_integer(num_grid_columns)
    error_checking.assert_is_greater(num_grid_columns, 0)
    error_checking.assert_file_exists(wgrib_exe_name)
    error_checking.assert_file_exists(wgrib2_exe_name)
    error_checking.assert_is_boolean(raise_error_if_fails)
    if sentinel_value is not None:
        error_checking.assert_is_not_nan(sentinel_value)

    # Housekeeping.
    grib_file_type = file_name_to_type(grib_file_name)

    if temporary_dir_name is not None:
        file_system_utils.mkdir_recursive_if_necessary(
            directory_name=temporary_dir_name)
    temporary_file_name = tempfile.NamedTemporaryFile(dir=temporary_dir_name,
                                                      delete=False).name

    # Extract field to temporary file.
    if grib_file_type == GRIB1_FILE_TYPE:
        command_string = (
            '"{0:s}" "{1:s}" -s | grep -w "{2:s}" | "{0:s}" -i "{1:s}" '
            '-text -nh -o "{3:s}"').format(wgrib_exe_name, grib_file_name,
                                           field_name_grib1,
                                           temporary_file_name)
    else:
        command_string = (
            '"{0:s}" "{1:s}" -s | grep -w "{2:s}" | "{0:s}" -i "{1:s}" '
            '-no_header -text "{3:s}"').format(
                wgrib2_exe_name, grib_file_name,
                _field_name_grib1_to_grib2(field_name_grib1),
                temporary_file_name)

    try:
        subprocess.call(command_string, shell=True)
    except OSError as this_exception:
        os.remove(temporary_file_name)
        if raise_error_if_fails:
            raise

        warning_string = (
            '\n\n{0:s}\n\nCommand (shown above) failed (details shown below).'
            '\n\n{1:s}').format(command_string, str(this_exception))

        warnings.warn(warning_string)
        return None

    # Read field from temporary file.
    field_vector = numpy.loadtxt(temporary_file_name)
    os.remove(temporary_file_name)

    try:
        field_matrix = numpy.reshape(field_vector,
                                     (num_grid_rows, num_grid_columns))
    except ValueError as this_exception:
        if raise_error_if_fails:
            raise

        warning_string = (
            '\n\nnumpy.reshape failed (details shown below).\n\n{0:s}').format(
                str(this_exception))

        warnings.warn(warning_string)
        return None

    return _sentinel_value_to_nan(data_matrix=field_matrix,
                                  sentinel_value=sentinel_value)
def read_tornado_reports(csv_file_name):
    """Reads tornado reports from file.

    This file should contain all storm reports for one year.

    :param csv_file_name: Path to input file.
    :return: tornado_table: pandas DataFrame with the following columns.
    tornado_table.start_time_unix_sec: Start time.
    tornado_table.end_time_unix_sec: End time.
    tornado_table.start_latitude_deg: Latitude (deg N) of start point.
    tornado_table.start_longitude_deg: Longitude (deg E) of start point.
    tornado_table.end_latitude_deg: Latitude (deg N) of end point.
    tornado_table.end_longitude_deg: Longitude (deg E) of end point.
    tornado_table.fujita_rating: F-scale or EF-scale rating (integer from
        0...5).
    tornado_table.width_metres: Tornado width (metres).
    """

    error_checking.assert_file_exists(csv_file_name)
    storm_event_table = pandas.read_csv(csv_file_name, header=0, sep=',')

    tornado_flags = numpy.array([
        _is_event_tornado(s)
        for s in storm_event_table[EVENT_TYPE_COLUMN_ORIG].values
    ])

    bad_rows = numpy.where(numpy.invert(tornado_flags))[0]
    storm_event_table.drop(storm_event_table.index[bad_rows],
                           axis=0,
                           inplace=True)

    num_reports = len(storm_event_table.index)
    start_times_unix_sec = numpy.full(num_reports, -1, dtype=int)
    end_times_unix_sec = numpy.full(num_reports, -1, dtype=int)

    for i in range(num_reports):
        this_utc_offset_hours = _time_zone_string_to_utc_offset(
            storm_event_table[TIME_ZONE_COLUMN_ORIG].values[i])
        if numpy.isnan(this_utc_offset_hours):
            continue

        start_times_unix_sec[i] = _local_time_string_to_unix_sec(
            storm_event_table[START_TIME_COLUMN_ORIG].values[i],
            this_utc_offset_hours)
        end_times_unix_sec[i] = _local_time_string_to_unix_sec(
            storm_event_table[END_TIME_COLUMN_ORIG].values[i],
            this_utc_offset_hours)

    tornado_dict = {
        tornado_io.START_TIME_COLUMN:
        start_times_unix_sec,
        tornado_io.END_TIME_COLUMN:
        end_times_unix_sec,
        tornado_io.START_LAT_COLUMN:
        storm_event_table[START_LATITUDE_COLUMN_ORIG].values,
        tornado_io.START_LNG_COLUMN:
        storm_event_table[START_LONGITUDE_COLUMN_ORIG].values,
        tornado_io.END_LAT_COLUMN:
        storm_event_table[END_LATITUDE_COLUMN_ORIG].values,
        tornado_io.END_LNG_COLUMN:
        storm_event_table[END_LONGITUDE_COLUMN_ORIG].values,
        tornado_io.FUJITA_RATING_COLUMN:
        storm_event_table[TORNADO_RATING_COLUMN_ORIG].values,
        tornado_io.WIDTH_COLUMN:
        FEET_TO_METRES * storm_event_table[TORNADO_WIDTH_COLUMN_ORIG].values
    }

    tornado_table = pandas.DataFrame.from_dict(tornado_dict)
    return tornado_io.remove_invalid_reports(tornado_table)
def read_thunderstorm_wind_reports(csv_file_name):
    """Reads thunderstorm-wind reports from file.

    This file should contain all storm reports for one year.

    :param csv_file_name: Path to input file.
    :return: wind_table: pandas DataFrame with the following columns.
    wind_table.latitude_deg: Latitude (deg N).
    wind_table.longitude_deg: Longitude (deg E).
    wind_table.unix_time_sec: Valid time.
    wind_table.u_wind_m_s01: u-component of wind (metres per second).
    wind_table.v_wind_m_s01: v-component of wind (metres per second).
    """

    error_checking.assert_file_exists(csv_file_name)
    storm_event_table = pandas.read_csv(csv_file_name, header=0, sep=',')

    thunderstorm_wind_flags = numpy.array([
        _is_event_thunderstorm_wind(s)
        for s in storm_event_table[EVENT_TYPE_COLUMN_ORIG].values
    ])

    bad_rows = numpy.where(numpy.invert(thunderstorm_wind_flags))[0]
    storm_event_table.drop(storm_event_table.index[bad_rows],
                           axis=0,
                           inplace=True)

    num_reports = len(storm_event_table.index)
    unix_times_sec = numpy.full(num_reports, -1, dtype=int)
    for i in range(num_reports):
        this_utc_offset_hours = _time_zone_string_to_utc_offset(
            storm_event_table[TIME_ZONE_COLUMN_ORIG].values[i])
        if numpy.isnan(this_utc_offset_hours):
            continue

        unix_times_sec[i] = _local_time_string_to_unix_sec(
            storm_event_table[START_TIME_COLUMN_ORIG].values[i],
            this_utc_offset_hours)

    wind_speeds_m_s01 = KT_TO_METRES_PER_SECOND * storm_event_table[
        WIND_SPEED_COLUMN_ORIG].values
    wind_directions_deg = numpy.full(num_reports,
                                     raw_wind_io.WIND_DIR_DEFAULT_DEG)
    u_winds_m_s01, v_winds_m_s01 = raw_wind_io.speed_and_direction_to_uv(
        wind_speeds_m_s01, wind_directions_deg)

    station_ids = _create_fake_station_ids_for_wind(num_reports)

    wind_dict = {
        raw_wind_io.TIME_COLUMN:
        unix_times_sec,
        raw_wind_io.LATITUDE_COLUMN:
        storm_event_table[START_LATITUDE_COLUMN_ORIG].values,
        raw_wind_io.LONGITUDE_COLUMN:
        storm_event_table[START_LONGITUDE_COLUMN_ORIG].values,
        raw_wind_io.STATION_ID_COLUMN:
        station_ids,
        raw_wind_io.STATION_NAME_COLUMN:
        station_ids,
        raw_wind_io.ELEVATION_COLUMN:
        numpy.full(num_reports, numpy.nan),
        raw_wind_io.U_WIND_COLUMN:
        u_winds_m_s01,
        raw_wind_io.V_WIND_COLUMN:
        v_winds_m_s01
    }

    wind_table = pandas.DataFrame.from_dict(wind_dict)
    return _remove_invalid_wind_reports(wind_table)
def read_highs_and_lows(text_file_name):
    """Reads locations of high- and low-pressure centers.

    :param text_file_name: Path to input file (text file in WPC format).
    :return: high_low_table: pandas DataFrame with the following columns.  Each
        row is one high- or low-pressure center.
    high_low_table.system_type_string: Type of system (either "high" or "low").
    high_low_table.unix_time_sec: Valid time.
    high_low_table.latitude_deg: Latitude (deg N).
    high_low_table.longitude_deg: Longitude (deg E).
    """

    error_checking.assert_file_exists(text_file_name)
    valid_time_unix_sec = _file_name_to_valid_time(text_file_name)

    system_type_strings = []
    latitudes_deg = numpy.array([], dtype=float)
    longitudes_deg = numpy.array([], dtype=float)

    for this_line in open(text_file_name, 'r').readlines():
        these_words = this_line.split()
        if len(these_words) == 0:
            continue

        this_system_type_string = these_words[0].lower()
        if this_system_type_string not in VALID_SYSTEM_TYPE_STRINGS:
            continue

        these_words = these_words[1:]
        these_latitudes_deg = []
        these_longitudes_deg = []

        for this_word in these_words:
            if len(this_word) < 5:
                continue

            this_latitude_deg, this_longitude_deg = _string_to_latlng(
                latlng_string=this_word, raise_error_if_fails=True)

            these_latitudes_deg.append(this_latitude_deg)
            these_longitudes_deg.append(this_longitude_deg)

        these_latitudes_deg = numpy.array(these_latitudes_deg)
        these_longitudes_deg = numpy.array(these_longitudes_deg)

        if numpy.any(numpy.isnan(these_latitudes_deg)):
            continue

        error_checking.assert_is_valid_lat_numpy_array(these_latitudes_deg)
        these_longitudes_deg = lng_conversion.convert_lng_positive_in_west(
            these_longitudes_deg, allow_nan=False)

        system_type_strings += ([this_system_type_string] *
                                len(these_latitudes_deg))

        latitudes_deg = numpy.concatenate((latitudes_deg, these_latitudes_deg))
        longitudes_deg = numpy.concatenate(
            (longitudes_deg, these_longitudes_deg))

    valid_times_unix_sec = numpy.full(len(system_type_strings),
                                      valid_time_unix_sec,
                                      dtype=int)

    high_low_dict = {
        SYSTEM_TYPE_COLUMN: system_type_strings,
        front_utils.TIME_COLUMN: valid_times_unix_sec,
        LATITUDE_COLUMN: latitudes_deg,
        LONGITUDE_COLUMN: longitudes_deg
    }

    return pandas.DataFrame.from_dict(high_low_dict)
def read_fronts_from_file(text_file_name):
    """Reads locations of warm and cold fronts from WPC bulletin.

    Input file should contain positions of cyclones, anticyclones, fronts, etc.
    for a single valid time.

    :param text_file_name: Path to input file (text file in WPC format).
    :return: front_table: pandas DataFrame with the following columns.  Each row
        is one front.
    front_table.front_type: Type of front (examples: "warm", "cold").
    front_table.unix_time_sec: Valid time.
    front_table.latitudes_deg: 1-D numpy array of latitudes (deg N) along front.
    front_table.longitudes_deg: 1-D numpy array of longitudes (deg E) along
        front.
    """

    error_checking.assert_file_exists(text_file_name)
    valid_time_unix_sec = _file_name_to_valid_time(text_file_name)

    front_types = []
    latitudes_2d_list_deg = []
    longitudes_2d_list_deg = []

    for this_line in open(text_file_name, 'r').readlines():
        these_words = this_line.split()  # Need to skip empty lines.
        if not these_words:
            continue

        this_front_type = these_words[0].lower()
        if this_front_type not in VALID_FRONT_TYPES:
            continue

        these_words = these_words[1:]
        this_num_points = len(these_words)
        these_latitudes_deg = numpy.full(this_num_points, numpy.nan)
        these_longitudes_deg = numpy.full(this_num_points, numpy.nan)

        for i in range(this_num_points):
            these_latitudes_deg[i], these_longitudes_deg[
                i] = _string_to_latlng(these_words[i], False)

        if numpy.any(numpy.isnan(these_latitudes_deg)):
            continue

        error_checking.assert_is_valid_lat_numpy_array(these_latitudes_deg)
        these_longitudes_deg = lng_conversion.convert_lng_positive_in_west(
            these_longitudes_deg, allow_nan=False)

        front_types.append(this_front_type)
        latitudes_2d_list_deg.append(these_latitudes_deg)
        longitudes_2d_list_deg.append(these_longitudes_deg)

    num_fronts = len(front_types)
    valid_times_unix_sec = numpy.full(num_fronts,
                                      valid_time_unix_sec,
                                      dtype=int)

    front_dict = {
        front_utils.FRONT_TYPE_COLUMN: front_types,
        front_utils.TIME_COLUMN: valid_times_unix_sec,
        front_utils.LATITUDES_COLUMN: latitudes_2d_list_deg,
        front_utils.LONGITUDES_COLUMN: longitudes_2d_list_deg
    }
    return pandas.DataFrame.from_dict(front_dict)
def read_winds_from_raw_file(netcdf_file_name, secondary_source=None,
                             raise_error_if_fails=True):
    """Reads wind observations from raw file.

    This file should contain all fields for one secondary data source and one
    hour.

    :param netcdf_file_name: Path to input file.
    :param secondary_source: String ID for secondary data source.
    :param raise_error_if_fails: Boolean flag.  If True and the read fails, this
        method will raise an error.  If False and the read fails, this method
        will return None.
    :return: wind_table: If file cannot be opened and raise_error_if_fails =
        False, this is None.  Otherwise, it is a pandas DataFrame with the
        following columns.
    wind_table.station_id: String ID for station.
    wind_table.station_name: Verbose name for station.
    wind_table.latitude_deg: Latitude (deg N).
    wind_table.longitude_deg: Longitude (deg E).
    wind_table.elevation_m_asl: Elevation (metres above sea level).
    wind_table.unix_time_sec: Observation time (seconds since 0000 UTC 1 Jan
        1970).
    wind_table.wind_speed_m_s01: Speed of sustained wind (m/s).
    wind_table.wind_direction_deg: Direction of sustained wind (degrees of
        origin -- i.e., direction that the wind is coming from -- as per
        meteorological convention).
    wind_table.wind_gust_speed_m_s01: Speed of wind gust (m/s).
    wind_table.wind_gust_direction_deg: Direction of wind gust (degrees of
        origin).
    """

    error_checking.assert_file_exists(netcdf_file_name)
    netcdf_dataset = netcdf_io.open_netcdf(netcdf_file_name,
                                           raise_error_if_fails)
    if netcdf_dataset is None:
        return None

    # TODO(thunderhoser): This is hacky (accounts for length-0 arrays of station
    # names).  Find a better way to handle this exception.
    try:
        station_names = _char_matrix_to_string_list(
            netcdf_dataset.variables[STATION_NAME_COLUMN_ORIG][:])
    except IndexError:
        return None

    try:
        station_ids = _char_matrix_to_string_list(
            netcdf_dataset.variables[STATION_ID_COLUMN_ORIG][:])
    except KeyError:
        station_ids = station_names

    for i in range(len(station_ids)):
        station_ids[i] = raw_wind_io.append_source_to_station_id(
            station_ids[i], primary_source=raw_wind_io.MADIS_DATA_SOURCE,
            secondary_source=secondary_source)

    try:
        unix_times_sec = netcdf_dataset.variables[TIME_COLUMN_ORIG][:]
    except KeyError:
        unix_times_sec = netcdf_dataset.variables[TIME_COLUMN_ORIG_BACKUP][:]

    wind_speeds_m_s01 = netcdf_dataset.variables[WIND_SPEED_COLUMN_ORIG][:]
    wind_speed_quality_flags = netcdf_dataset.variables[
        WIND_SPEED_FLAG_COLUMN_ORIG][:]
    num_observations = len(wind_speeds_m_s01)

    try:
        wind_directions_deg = netcdf_dataset.variables[WIND_DIR_COLUMN_ORIG][:]
        wind_dir_quality_flags = netcdf_dataset.variables[
            WIND_DIR_FLAG_COLUMN_ORIG][:]
    except KeyError:
        wind_directions_deg = numpy.full(num_observations, numpy.nan)
        wind_dir_quality_flags = [DEFAULT_QUALITY_FLAG] * num_observations

    try:
        wind_gust_speeds_m_s01 = netcdf_dataset.variables[
            WIND_GUST_SPEED_COLUMN_ORIG][:]
        wind_gust_speed_quality_flags = netcdf_dataset.variables[
            WIND_GUST_SPEED_FLAG_COLUMN_ORIG][:]
    except KeyError:
        wind_gust_speeds_m_s01 = numpy.full(num_observations, numpy.nan)
        wind_gust_speed_quality_flags = (
            [DEFAULT_QUALITY_FLAG] * num_observations)

    try:
        wind_gust_directions_deg = netcdf_dataset.variables[
            WIND_GUST_DIR_COLUMN_ORIG][:]
        wind_gust_dir_quality_flags = netcdf_dataset.variables[
            WIND_GUST_DIR_FLAG_COLUMN_ORIG][:]
    except KeyError:
        wind_gust_directions_deg = numpy.full(num_observations, numpy.nan)
        wind_gust_dir_quality_flags = [DEFAULT_QUALITY_FLAG] * num_observations

    wind_dict = {raw_wind_io.STATION_ID_COLUMN: station_ids,
                 raw_wind_io.STATION_NAME_COLUMN: station_names,
                 raw_wind_io.LATITUDE_COLUMN: netcdf_dataset.variables[
                     LATITUDE_COLUMN_ORIG][:],
                 raw_wind_io.LONGITUDE_COLUMN: netcdf_dataset.variables[
                     LONGITUDE_COLUMN_ORIG][:],
                 raw_wind_io.ELEVATION_COLUMN: netcdf_dataset.variables[
                     ELEVATION_COLUMN_ORIG][:],
                 raw_wind_io.TIME_COLUMN: numpy.array(unix_times_sec).astype(
                     int),
                 raw_wind_io.WIND_SPEED_COLUMN: wind_speeds_m_s01,
                 raw_wind_io.WIND_DIR_COLUMN: wind_directions_deg,
                 raw_wind_io.WIND_GUST_SPEED_COLUMN: wind_gust_speeds_m_s01,
                 raw_wind_io.WIND_GUST_DIR_COLUMN: wind_gust_directions_deg,
                 WIND_SPEED_FLAG_COLUMN: wind_speed_quality_flags,
                 WIND_DIR_FLAG_COLUMN: wind_dir_quality_flags,
                 WIND_GUST_SPEED_FLAG_COLUMN: wind_gust_speed_quality_flags,
                 WIND_GUST_DIR_FLAG_COLUMN: wind_gust_dir_quality_flags}

    netcdf_dataset.close()
    wind_table = pandas.DataFrame.from_dict(wind_dict)
    wind_table = _remove_invalid_wind_rows(wind_table)
    return _remove_low_quality_data(wind_table)