def write_file(
        pickle_file_name, activation_matrix, storm_ids, storm_times_unix_sec,
        model_file_name, component_type_string, target_class=None,
        layer_name=None, neuron_index_matrix=None, channel_indices=None):
    """Writes activations to Pickle file.

    E = number of examples (storm objects)
    C = number of model components (classes, neurons, or channels) for which
        activations were computed

    :param pickle_file_name: Path to output file.
    :param activation_matrix: E-by-C numpy array of activations, where
        activation_matrix[i, j] = activation of the [j]th model component for
        the [i]th example.
    :param storm_ids: length-E list of storm IDs.
    :param storm_times_unix_sec: length-E numpy array of storm times.
    :param model_file_name: Path to file with trained model.
    :param component_type_string: See doc for `check_metadata`.
    :param target_class: Same.
    :param layer_name: Same.
    :param neuron_index_matrix: Same.
    :param channel_indices: Same.
    """

    num_components = check_metadata(
        component_type_string=component_type_string, target_class=target_class,
        layer_name=layer_name, neuron_index_matrix=neuron_index_matrix,
        channel_indices=channel_indices)
    error_checking.assert_is_string(model_file_name)

    error_checking.assert_is_string_list(storm_ids)
    error_checking.assert_is_numpy_array(
        numpy.array(storm_ids), num_dimensions=1)
    num_examples = len(storm_ids)

    error_checking.assert_is_integer_numpy_array(storm_times_unix_sec)
    error_checking.assert_is_numpy_array(
        storm_times_unix_sec, exact_dimensions=numpy.array([num_examples]))

    error_checking.assert_is_numpy_array_without_nan(activation_matrix)
    error_checking.assert_is_numpy_array(
        activation_matrix,
        exact_dimensions=numpy.array([num_examples, num_components]))

    metadata_dict = {
        STORM_IDS_KEY: storm_ids,
        STORM_TIMES_KEY: storm_times_unix_sec,
        MODEL_FILE_NAME_KEY: model_file_name,
        COMPONENT_TYPE_KEY: component_type_string,
        TARGET_CLASS_KEY: target_class,
        LAYER_NAME_KEY: layer_name,
        NEURON_INDICES_KEY: neuron_index_matrix,
        CHANNEL_INDICES_KEY: channel_indices,
    }

    file_system_utils.mkdir_recursive_if_necessary(file_name=pickle_file_name)
    pickle_file_handle = open(pickle_file_name, 'wb')
    pickle.dump(activation_matrix, pickle_file_handle)
    pickle.dump(metadata_dict, pickle_file_handle)
    pickle_file_handle.close()
def _plot_feature_map_after_conv(feature_matrix):
    """Plots new feature map (after convolution).

    M = number of rows in grid
    N = number of columns in grid

    :param feature_matrix: Feature map as M-by-N numpy array.
    """

    dummy_matrix = numpy.full(feature_matrix.shape, numpy.nan)
    dummy_matrix[:2, :2] = HIGHLIGHTED_VALUE

    _, axes_object = pyplot.subplots(1,
                                     1,
                                     figsize=(FIGURE_WIDTH_INCHES,
                                              FIGURE_HEIGHT_INCHES))
    pyplot.imshow(dummy_matrix,
                  cmap=COLOUR_MAP_OBJECT,
                  vmin=HIGHLIGHTED_VALUE - 1,
                  vmax=HIGHLIGHTED_VALUE,
                  axes=axes_object,
                  origin='upper')
    pyplot.xticks([], [])
    pyplot.yticks([], [])

    for i in range(feature_matrix.shape[1]):
        for j in range(feature_matrix.shape[0]):
            if i == j == 1:
                this_colour = SPECIAL_COLOUR + 0.
            else:
                this_colour = MAIN_COLOUR + 0.

            axes_object.text(i,
                             j,
                             '{0:.1f}'.format(feature_matrix[j, i]),
                             fontsize=OVERLAY_FONT_SIZE,
                             color=this_colour,
                             horizontalalignment='center',
                             verticalalignment='center')

    # polygon_x_coords = numpy.array([0, 2, 2, 0, 0], dtype=float) - 0.5
    # polygon_y_coords = numpy.array([2, 2, 0, 0, 2], dtype=float) - 0.5
    # axes_object.plot(
    #     polygon_x_coords, polygon_y_coords, color=LINE_COLOUR,
    #     linewidth=LINE_WIDTH)

    plotting_utils.annotate_axes(axes_object=axes_object,
                                 annotation_string='(c)',
                                 font_colour=ANNOTATION_COLOUR)

    print 'Saving figure to: "{0:s}"...'.format(AFTER_CONV_FILE_NAME)
    file_system_utils.mkdir_recursive_if_necessary(
        file_name=AFTER_CONV_FILE_NAME)
    pyplot.savefig(AFTER_CONV_FILE_NAME, dpi=OUTPUT_RESOLUTION_DPI)
    pyplot.close()

    imagemagick_utils.trim_whitespace(input_file_name=AFTER_CONV_FILE_NAME,
                                      output_file_name=AFTER_CONV_FILE_NAME)

    return feature_matrix
def _write_intermediate_results(storm_object_table, temp_file_name):
    """Writes intermediate best-track results for a subset of storm objects.

    P = number of grid points in a given storm object

    :param storm_object_table: pandas DataFrame with the following columns.
        Each row is one storm object.
    storm_object_table.storm_id: String ID for storm cell.
    storm_object_table.original_storm_id: Original ID (before best-track).
    storm_object_table.unix_time_sec: Valid time.
    storm_object_table.spc_date_unix_sec: Valid SPC date.
    storm_object_table.grid_point_latitudes_deg: length-P numpy array with
        latitudes (deg N) of grid points in storm object.
    storm_object_table.grid_point_longitudes_deg: length-P numpy array with
        longitudes (deg E) of grid points in storm object.
    storm_object_table.grid_point_rows: length-P numpy array with row indices
        (integers) of grid points in storm object.
    storm_object_table.grid_point_columns: length-P numpy array with column
        indices (integers) of grid points in storm object.

    storm_object_table.centroid_x_metres: x-coordinate of storm centroid.
    storm_object_table.centroid_y_metres: y-coordinate of storm centroid.
    storm_object_table.file_index: Array index of file containing storm object.
        This is an index into the file-name array for the given SPC date.

    :param temp_file_name: Path to intermediate file.
    """

    file_system_utils.mkdir_recursive_if_necessary(file_name=temp_file_name)

    pickle_file_handle = open(temp_file_name, 'wb')
    pickle.dump(storm_object_table[INTERMEDIATE_COLUMNS], pickle_file_handle)
    pickle_file_handle.close()
def write_to_netcdf(latitudes_deg, longitudes_deg, netcdf_file_name):
    """Writes boundary to NetCDF file.

    :param latitudes_deg: See doc for `_check_boundary`.
    :param longitudes_deg: Same.
    :param netcdf_file_name: Path to output file.
    """

    longitudes_deg = _check_boundary(latitudes_deg=latitudes_deg,
                                     longitudes_deg=longitudes_deg)

    file_system_utils.mkdir_recursive_if_necessary(file_name=netcdf_file_name)
    dataset_object = netCDF4.Dataset(netcdf_file_name,
                                     'w',
                                     format='NETCDF3_64BIT_OFFSET')

    num_points = len(latitudes_deg)
    dataset_object.createDimension(NETCDF_VERTEX_DIMENSION_KEY, num_points)

    dataset_object.createVariable(NETCDF_LATITUDES_KEY,
                                  datatype=numpy.float32,
                                  dimensions=NETCDF_VERTEX_DIMENSION_KEY)
    dataset_object.variables[NETCDF_LATITUDES_KEY][:] = latitudes_deg

    dataset_object.createVariable(NETCDF_LONGITUDES_KEY,
                                  datatype=numpy.float32,
                                  dimensions=NETCDF_VERTEX_DIMENSION_KEY)
    dataset_object.variables[NETCDF_LONGITUDES_KEY][:] = longitudes_deg

    dataset_object.close()
def _run(output_file_name):
    """Plots Laplacian kernel used for edge-detector test.

    This is effectively the main method.

    :param output_file_name: See documentation at top of file.
    """

    num_heights = KERNEL_MATRIX_3D.shape[-1]

    figure_object, axes_object_matrix = plotting_utils.create_paneled_figure(
        num_rows=1,
        num_columns=num_heights,
        horizontal_spacing=0.1,
        vertical_spacing=0.1,
        shared_x_axis=False,
        shared_y_axis=False,
        keep_aspect_ratio=True)

    for k in range(num_heights):
        _plot_kernel_one_height(kernel_matrix_2d=KERNEL_MATRIX_3D[..., k],
                                axes_object=axes_object_matrix[0, k])

    axes_object_matrix[0, 0].set_title('Bottom height')
    axes_object_matrix[0, 1].set_title('Middle height')
    axes_object_matrix[0, 2].set_title('Top height')

    file_system_utils.mkdir_recursive_if_necessary(file_name=output_file_name)

    print('Saving figure to: "{0:s}"...'.format(output_file_name))
    figure_object.savefig(output_file_name,
                          dpi=FIGURE_RESOLUTION_DPI,
                          pad_inches=0,
                          bbox_inches='tight')
    pyplot.close(figure_object)
예제 #6
0
def write_narr_grids_to_file(frontal_grid_table, pickle_file_name):
    """Writes one or more NARR* grids to file.

    * NARR = North American Regional Reanalysis

    :param frontal_grid_table: pandas DataFrame with the following columns.
        Each row is one valid time.
    frontal_grid_table.unix_time_sec: Valid time.
    frontal_grid_table.warm_front_row_indices: length-W numpy array with row
        indices (integers) of grid cells intersected by a warm front.
    frontal_grid_table.warm_front_column_indices: Same as above, except for
        columns.
    frontal_grid_table.cold_front_row_indices: length-C numpy array with row
        indices (integers) of grid cells intersected by a cold front.
    frontal_grid_table.cold_front_column_indices: Same as above, except for
        columns.

    :param pickle_file_name: Path to output file.
    """

    error_checking.assert_columns_in_dataframe(frontal_grid_table,
                                               REQUIRED_GRID_COLUMNS)

    file_system_utils.mkdir_recursive_if_necessary(file_name=pickle_file_name)
    pickle_file_handle = open(pickle_file_name, 'wb')
    pickle.dump(frontal_grid_table[REQUIRED_GRID_COLUMNS], pickle_file_handle)
    pickle_file_handle.close()
def _plot_scores_as_grid(
        score_matrix, colour_map_object, min_colour_value, max_colour_value,
        x_tick_labels, x_axis_label, x_axis_text_colour, y_tick_labels,
        y_axis_label, y_axis_text_colour, title_string, output_file_name):
    """Plots model scores as 2-D grid.

    M = number of rows in grid
    N = number of columns in grid

    :param score_matrix: M-by-N numpy array of model scores.
    :param colour_map_object: Instance of `matplotlib.colors.ListedColormap`.
    :param min_colour_value: Minimum value in colour map.
    :param max_colour_value: Max value in colour map.
    :param x_tick_labels: length-N list of string labels.
    :param x_axis_label: String label for the entire x-axis.
    :param x_axis_text_colour: Colour for all text labels along x-axis.
    :param y_tick_labels: length-M list of string labels.
    :param y_axis_label: String label for the entire y-axis.
    :param y_axis_text_colour: Colour for all text labels along y-axis.
    :param title_string: Figure title.
    :param output_file_name: Path to output file (the figure will be saved
        here).
    """

    _, axes_object = pyplot.subplots(
        1, 1, figsize=(FIGURE_WIDTH_INCHES, FIGURE_HEIGHT_INCHES))

    score_matrix_to_plot = score_matrix + 0.
    score_matrix_to_plot[numpy.isnan(score_matrix_to_plot)] = 0.
    pyplot.imshow(
        score_matrix_to_plot, cmap=colour_map_object, origin='lower',
        vmin=min_colour_value, vmax=max_colour_value)

    x_tick_values = numpy.linspace(
        0, score_matrix_to_plot.shape[1] - 1,
        num=score_matrix_to_plot.shape[1], dtype=float)
    pyplot.xticks(x_tick_values, x_tick_labels, color=x_axis_text_colour)
    pyplot.xlabel(x_axis_label, color=x_axis_text_colour)

    y_tick_values = numpy.linspace(
        0, score_matrix_to_plot.shape[0] - 1,
        num=score_matrix_to_plot.shape[0], dtype=float)
    pyplot.yticks(y_tick_values, y_tick_labels, color=y_axis_text_colour)
    pyplot.ylabel(y_axis_label, color=y_axis_text_colour)

    pyplot.title(title_string)
    plotting_utils.add_linear_colour_bar(
        axes_object_or_list=axes_object,
        values_to_colour=score_matrix_to_plot,
        colour_map=colour_map_object, colour_min=min_colour_value,
        colour_max=max_colour_value, orientation='vertical',
        extend_min=True, extend_max=True, font_size=FONT_SIZE)

    print 'Saving figure to: "{0:s}"...'.format(output_file_name)
    file_system_utils.mkdir_recursive_if_necessary(file_name=output_file_name)
    pyplot.savefig(output_file_name, dpi=FIGURE_RESOLUTION_DPI)
    pyplot.close()

    imagemagick_utils.trim_whitespace(
        input_file_name=output_file_name, output_file_name=output_file_name)
예제 #8
0
def write_processed_file(wind_table, csv_file_name=None, write_mode='w'):
    """Writes wind observations to file.

    This is considered a "processed file," as opposed to a "raw file".  A "raw
    file" is one taken directly from another database, in the native format of
    said database.  For examples, see `madis_io.read_winds_from_raw_file` and
    `ok_mesonet_io.read_winds_from_raw_file`.

    :param wind_table: pandas DataFrame with the following columns.
    wind_table.station_id: String ID for station.
    wind_table.station_name: Verbose name for station.
    wind_table.latitude_deg: Latitude (deg N).
    wind_table.longitude_deg: Longitude (deg E).
    wind_table.elevation_m_asl: Elevation (metres above sea level).
    wind_table.unix_time_sec: Valid time in Unix format.
    wind_table.u_wind_m_s01: u-wind (metres per second).
    wind_table.v_wind_m_s01: v-wind (metres per second).

    :param csv_file_name: Path to output file.
    :param write_mode: Any string accepted by the built-in method `open`.
    """

    error_checking.assert_columns_in_dataframe(wind_table, WIND_COLUMNS)
    file_system_utils.mkdir_recursive_if_necessary(file_name=csv_file_name)

    write_header = not os.path.isfile(csv_file_name) or 'w' in write_mode
    wind_table.to_csv(csv_file_name,
                      header=write_header,
                      columns=WIND_COLUMNS,
                      index=False,
                      mode=write_mode)
예제 #9
0
def resize_image(input_file_name,
                 output_file_name,
                 output_size_pixels,
                 convert_exe_name=DEFAULT_CONVERT_EXE_NAME):
    """Resizes image.

    :param input_file_name: Path to input file (may be in any format handled by
        ImageMagick).
    :param output_file_name: Path to output file.
    :param output_size_pixels: Output size.
    :param convert_exe_name: See doc for `trim_whitespace`.
    :raises: ValueError: if ImageMagick command (which is ultimately a Unix
        command) fails.
    """

    error_checking.assert_file_exists(input_file_name)
    file_system_utils.mkdir_recursive_if_necessary(file_name=output_file_name)
    error_checking.assert_is_integer(output_size_pixels)
    error_checking.assert_is_greater(output_size_pixels, 0)
    error_checking.assert_file_exists(convert_exe_name)

    command_string = '"{0:s}" "{1:s}" -resize {2:d}@ "{3:s}"'.format(
        convert_exe_name, input_file_name, output_size_pixels,
        output_file_name)

    exit_code = os.system(command_string)
    if exit_code == 0:
        return

    raise ValueError(ERROR_STRING)
예제 #10
0
def write_ids_and_times(full_id_strings, storm_times_unix_sec,
                        pickle_file_name):
    """Writes full storm IDs and valid times (minimal metadata) to Pickle file.

    N = number of storm objects

    :param full_id_strings: length-N list of full IDs.
    :param storm_times_unix_sec: length-N numpy array of valid times.
    :param pickle_file_name: Path to output file.
    """

    error_checking.assert_is_string_list(full_id_strings)
    error_checking.assert_is_numpy_array(
        numpy.array(full_id_strings), num_dimensions=1)
    num_storm_objects = len(full_id_strings)

    error_checking.assert_is_integer_numpy_array(storm_times_unix_sec)
    error_checking.assert_is_numpy_array(
        storm_times_unix_sec,
        exact_dimensions=numpy.array([num_storm_objects], dtype=int)
    )

    metadata_dict = {
        FULL_IDS_KEY: full_id_strings,
        STORM_TIMES_KEY: storm_times_unix_sec,
    }

    file_system_utils.mkdir_recursive_if_necessary(file_name=pickle_file_name)

    pickle_file_handle = open(pickle_file_name, 'wb')
    pickle.dump(metadata_dict, pickle_file_handle)
    pickle_file_handle.close()
예제 #11
0
def write_results(result_dict, pickle_file_name):
    """Writes results to Pickle file.

    :param result_dict: Dictionary created by `run_permutation_test`, maybe with
        additional keys.
    :param pickle_file_name: Path to output file.
    :raises: ValueError: if any required keys are not found in the dictionary.
    """

    missing_keys = list(set(REQUIRED_KEYS) - set(result_dict.keys()))

    if len(missing_keys) == 0:
        file_system_utils.mkdir_recursive_if_necessary(
            file_name=pickle_file_name)

        pickle_file_handle = open(pickle_file_name, 'wb')
        pickle.dump(result_dict, pickle_file_handle)
        pickle_file_handle.close()

        return

    error_string = (
        '\n{0:s}\nKeys listed above were expected, but not found, in '
        'dictionary.').format(str(missing_keys))

    raise ValueError(error_string)
예제 #12
0
def trim_whitespace(input_file_name,
                    output_file_name,
                    border_width_pixels=10,
                    convert_exe_name=DEFAULT_CONVERT_EXE_NAME):
    """Trims whitespace around edge of image.

    :param input_file_name: Path to input file (may be in any format handled by
        ImageMagick).
    :param output_file_name: Path to output file.
    :param border_width_pixels: Desired border width (whitespace).
    :param convert_exe_name: Path to executable file for ImageMagick's "convert"
        function.  If you installed ImageMagick with root access, this should be
        the default.  Regardless, the pathless file name should be just
        "convert".
    :raises: ValueError: if ImageMagick command (which is ultimately a Unix
        command) fails.
    """

    error_checking.assert_file_exists(input_file_name)
    file_system_utils.mkdir_recursive_if_necessary(file_name=output_file_name)
    error_checking.assert_is_integer(border_width_pixels)
    error_checking.assert_is_geq(border_width_pixels, 0)
    error_checking.assert_file_exists(convert_exe_name)

    command_string = (
        '"{0:s}" "{1:s}" -trim -bordercolor White -border {2:d} "{3:s}"'
    ).format(convert_exe_name, input_file_name, border_width_pixels,
             output_file_name)

    exit_code = os.system(command_string)
    if exit_code == 0:
        return
    raise ValueError(ERROR_STRING)
예제 #13
0
def write_processed_file(tornado_table, csv_file_name):
    """Writes tornado reports to CSV file.

    This is considered a "processed file," as opposed to a "raw file" (one taken
    directly from the Storm Events database).  Raw files with tornado reports
    are handled by storm_events_io.py.

    :param tornado_table: pandas DataFrame with the following columns.
    tornado_table.start_time_unix_sec: Start time.
    tornado_table.end_time_unix_sec: End time.
    tornado_table.start_latitude_deg: Latitude (deg N) of start point.
    tornado_table.start_longitude_deg: Longitude (deg E) of start point.
    tornado_table.end_latitude_deg: Latitude (deg N) of end point.
    tornado_table.end_longitude_deg: Longitude (deg E) of end point.
    tornado_table.fujita_rating: F-scale or EF-scale rating (integer from
        0...5).
    tornado_table.width_metres: Tornado width (metres).

    :param csv_file_name: Path to output file.
    """

    error_checking.assert_columns_in_dataframe(tornado_table,
                                               MANDATORY_COLUMNS)
    file_system_utils.mkdir_recursive_if_necessary(file_name=csv_file_name)

    tornado_table.to_csv(csv_file_name,
                         header=True,
                         columns=MANDATORY_COLUMNS,
                         index=False)
예제 #14
0
def write_features_for_storm_objects(feature_table, pickle_file_name):
    """Writes features for storm objects to a Pickle file.

    :param feature_table: pandas DataFrame created by
        join_features_and_label_for_storm_objects.
    :param pickle_file_name: Path to output file.
    """

    (feature_column_names, regression_label_column_name,
     classification_label_column_name) = check_feature_table(
         feature_table, require_storm_objects=True)
    distance_buffer_column_names = tracking_io.get_distance_buffer_columns(
        feature_table)
    columns_to_write = (STORM_TO_WIND_COLUMNS_TO_KEEP + feature_column_names +
                        distance_buffer_column_names)

    if regression_label_column_name is not None:
        columns_to_write += [regression_label_column_name]
    if classification_label_column_name is not None:
        columns_to_write += [classification_label_column_name]

    file_system_utils.mkdir_recursive_if_necessary(file_name=pickle_file_name)
    pickle_file_handle = open(pickle_file_name, 'wb')
    pickle.dump(feature_table[columns_to_write], pickle_file_handle)
    pickle_file_handle.close()
예제 #15
0
def write_station_metadata_to_processed_file(station_metadata_table,
                                             csv_file_name):
    """Writes metadata for weather stations to file.

    This is considered a "processed file," as opposed to a "raw file".  A "raw
    file" is one taken directly from another database, in the native format of
    said database.  For examples, see
    `hfmetar_io.read_station_metadata_from_raw_file` and
    `ok_mesonet_io.read_station_metadata_from_raw_file`.

    :param station_metadata_table: pandas DataFrame with the following columns.
    station_metadata_table.station_id: String ID for station.
    station_metadata_table.station_name: Verbose name for station.
    station_metadata_table.latitude_deg: Latitude (deg N).
    station_metadata_table.longitude_deg: Longitude (deg E).
    station_metadata_table.elevation_m_asl: Elevation (metres above sea level).
    station_metadata_table.utc_offset_hours [optional]: Local time minus UTC.
    :param csv_file_name: Path to output file.
    """

    error_checking.assert_columns_in_dataframe(
        station_metadata_table, REQUIRED_STATION_METADATA_COLUMNS)

    file_system_utils.mkdir_recursive_if_necessary(file_name=csv_file_name)
    station_metadata_table.to_csv(csv_file_name,
                                  header=True,
                                  columns=STATION_METADATA_COLUMNS,
                                  index=False)
예제 #16
0
def unzip_tar(tar_file_name, target_directory_name=None,
              file_and_dir_names_to_unzip=None):
    """Unzips tar file.

    :param tar_file_name: Path to input file.
    :param target_directory_name: Path to output directory.
    :param file_and_dir_names_to_unzip: List of files and directories to extract
        from the tar file.  Each list element should be a relative path inside
        the tar file.  After unzipping, the same relative path will exist inside
        `target_directory_name`.
    :raises: ValueError: if the Unix command fails.
    """

    error_checking.assert_is_string(tar_file_name)
    error_checking.assert_is_string_list(file_and_dir_names_to_unzip)
    file_system_utils.mkdir_recursive_if_necessary(
        directory_name=target_directory_name)

    unix_command_string = 'tar -C "{0:s}" -xvf "{1:s}"'.format(
        target_directory_name, tar_file_name)
    for this_relative_path in file_and_dir_names_to_unzip:
        unix_command_string += ' "' + this_relative_path + '"'

    exit_code = os.system(unix_command_string)
    if exit_code != 0:
        raise ValueError('\nUnix command failed (log messages shown above '
                         'should explain why).')
def write_pmm_file(pickle_file_name,
                   mean_denorm_predictor_matrices,
                   mean_saliency_matrices,
                   model_file_name,
                   non_pmm_file_name,
                   pmm_max_percentile_level,
                   mean_sounding_pressures_pa=None):
    """Writes composite saliency map to Pickle file.

    The composite should be created by probability-matched means (PMM).

    H = number of sounding heights

    :param pickle_file_name: Path to output file.
    :param mean_denorm_predictor_matrices: See doc for
        `_check_in_and_out_matrices`.
    :param mean_saliency_matrices: Same.
    :param model_file_name: Path to model that created saliency maps (readable
        by `cnn.read_model`).
    :param non_pmm_file_name: Path to standard saliency file (containing
        non-composited saliency maps).
    :param pmm_max_percentile_level: Max percentile level for PMM.
    :param mean_sounding_pressures_pa: length-H numpy array of PMM-composited
        sounding pressures.  Needed only if the model is trained with soundings
        but without pressure as a predictor.
    """

    error_checking.assert_is_string(model_file_name)
    error_checking.assert_is_string(non_pmm_file_name)
    error_checking.assert_is_geq(pmm_max_percentile_level, 90.)
    error_checking.assert_is_leq(pmm_max_percentile_level, 100.)

    _check_in_and_out_matrices(
        predictor_matrices=mean_denorm_predictor_matrices,
        num_examples=None,
        saliency_matrices=mean_saliency_matrices)

    if mean_sounding_pressures_pa is not None:
        num_heights = mean_denorm_predictor_matrices[-1].shape[-2]
        these_expected_dim = numpy.array([num_heights], dtype=int)

        error_checking.assert_is_geq_numpy_array(mean_sounding_pressures_pa,
                                                 0.)
        error_checking.assert_is_numpy_array(
            mean_sounding_pressures_pa, exact_dimensions=these_expected_dim)

    mean_saliency_dict = {
        MEAN_PREDICTOR_MATRICES_KEY: mean_denorm_predictor_matrices,
        MEAN_SALIENCY_MATRICES_KEY: mean_saliency_matrices,
        MODEL_FILE_KEY: model_file_name,
        NON_PMM_FILE_KEY: non_pmm_file_name,
        PMM_MAX_PERCENTILE_KEY: pmm_max_percentile_level,
        MEAN_SOUNDING_PRESSURES_KEY: mean_sounding_pressures_pa
    }

    file_system_utils.mkdir_recursive_if_necessary(file_name=pickle_file_name)
    pickle_file_handle = open(pickle_file_name, 'wb')
    pickle.dump(mean_saliency_dict, pickle_file_handle)
    pickle_file_handle.close()
예제 #18
0
def _run(input_dir_name, output_dir_name):
    """Creates figure showing overall model evaluation.

    This is effectively the main method.

    :param input_dir_name: See documentation at top of file.
    :param output_dir_name: Same.
    """

    file_system_utils.mkdir_recursive_if_necessary(
        directory_name=output_dir_name)

    panel_file_names = [
        '{0:s}/{1:s}'.format(input_dir_name, p)
        for p in PATHLESS_INPUT_FILE_NAMES
    ]
    resized_panel_file_names = [
        '{0:s}/{1:s}'.format(output_dir_name, p)
        for p in PATHLESS_INPUT_FILE_NAMES
    ]

    letter_label = None

    for i in range(len(panel_file_names)):
        print('Resizing panel and saving to: "{0:s}"...'.format(
            resized_panel_file_names[i]))

        imagemagick_utils.trim_whitespace(
            input_file_name=panel_file_names[i],
            output_file_name=resized_panel_file_names[i])

        if letter_label is None:
            letter_label = 'a'
        else:
            letter_label = chr(ord(letter_label) + 1)

        _overlay_text(image_file_name=resized_panel_file_names[i],
                      x_offset_from_left_px=0,
                      y_offset_from_top_px=TITLE_FONT_SIZE,
                      text_string='({0:s})'.format(letter_label))
        imagemagick_utils.resize_image(
            input_file_name=resized_panel_file_names[i],
            output_file_name=resized_panel_file_names[i],
            output_size_pixels=PANEL_SIZE_PX)

    concat_figure_file_name = '{0:s}/overall_evaluation.jpg'.format(
        output_dir_name)
    print(
        'Concatenating panels to: "{0:s}"...'.format(concat_figure_file_name))

    imagemagick_utils.concatenate_images(
        input_file_names=resized_panel_file_names,
        output_file_name=concat_figure_file_name,
        num_panel_rows=NUM_PANEL_ROWS,
        num_panel_columns=NUM_PANEL_COLUMNS)
    imagemagick_utils.resize_image(input_file_name=concat_figure_file_name,
                                   output_file_name=concat_figure_file_name,
                                   output_size_pixels=CONCAT_FIGURE_SIZE_PX)
예제 #19
0
def write_ensembled_predictions(pickle_file_name, class_probability_matrix,
                                valid_times_unix_sec, narr_mask_matrix,
                                prediction_dir_name_by_model, model_weights):
    """Writes ensembled predictions to Pickle file.

    An "ensembled prediction" is an ensemble of gridded predictions from two or
    more NFA models.

    T = number of time steps
    M = number of rows in grid
    N = number of columns in grid
    C = number of classes

    :param pickle_file_name: Path to output file.
    :param class_probability_matrix: T-by-M-by-N-by-C numpy array of class
        probabilities.
    :param valid_times_unix_sec: length-T numpy array of time steps.
    :param narr_mask_matrix: See doc for `write_gridded_predictions`.
    :param prediction_dir_name_by_model: See doc for `check_ensemble_metadata`.
    :param model_weights: Same.
    """

    error_checking.assert_is_geq_numpy_array(class_probability_matrix, 0.)
    error_checking.assert_is_leq_numpy_array(class_probability_matrix, 1.)
    error_checking.assert_is_numpy_array(class_probability_matrix,
                                         num_dimensions=4)

    ml_utils.check_narr_mask(narr_mask_matrix)

    these_expected_dim = numpy.array(class_probability_matrix.shape[1:3],
                                     dtype=int)
    error_checking.assert_is_numpy_array(narr_mask_matrix,
                                         exact_dimensions=these_expected_dim)

    error_checking.assert_is_integer_numpy_array(valid_times_unix_sec)

    num_times = class_probability_matrix.shape[0]
    these_expected_dim = numpy.array([num_times], dtype=int)
    error_checking.assert_is_numpy_array(valid_times_unix_sec,
                                         exact_dimensions=these_expected_dim)

    check_ensemble_metadata(
        prediction_dir_name_by_model=prediction_dir_name_by_model,
        model_weights=model_weights)

    ensemble_dict = {
        CLASS_PROBABILITIES_KEY: class_probability_matrix,
        VALID_TIMES_KEY: valid_times_unix_sec,
        NARR_MASK_KEY: narr_mask_matrix,
        MODEL_DIRECTORIES_KEY: prediction_dir_name_by_model,
        MODEL_WEIGHTS_KEY: model_weights
    }

    file_system_utils.mkdir_recursive_if_necessary(file_name=pickle_file_name)
    pickle_file_handle = open(pickle_file_name, 'wb')
    pickle.dump(ensemble_dict, pickle_file_handle)
    pickle_file_handle.close()
예제 #20
0
def _run(tornado_dir_name, top_gridrad_dir_name, first_spc_date_string,
         last_spc_date_string, output_dir_name):
    """Plots histograms for GridRad dataset.

    This is effectively the main method.

    :param tornado_dir_name: See documentation at top of file.
    :param top_gridrad_dir_name: Same.
    :param first_spc_date_string: Same.
    :param last_spc_date_string: Same.
    :param output_dir_name: Same.
    """

    file_system_utils.mkdir_recursive_if_necessary(
        directory_name=output_dir_name)

    all_spc_date_strings = time_conversion.get_spc_dates_in_range(
        first_spc_date_string=first_spc_date_string,
        last_spc_date_string=last_spc_date_string)

    spc_date_strings = []

    for this_spc_date_string in all_spc_date_strings:
        this_gridrad_file_name = _find_gridrad_file_for_date(
            top_gridrad_dir_name=top_gridrad_dir_name,
            spc_date_string=this_spc_date_string)

        if this_gridrad_file_name is None:
            continue

        spc_date_strings.append(this_spc_date_string)

    first_year, last_year = _spc_dates_to_years(spc_date_strings)
    tornado_table = _read_tornado_reports(tornado_dir_name=tornado_dir_name,
                                          first_year=first_year,
                                          last_year=last_year)
    print(SEPARATOR_STRING)

    num_days = len(spc_date_strings)
    num_tornadoes_by_day = numpy.full(num_days, -1, dtype=int)

    for i in range(num_days):
        num_tornadoes_by_day[i] = _get_num_tornadoes_in_day(
            tornado_table=tornado_table, spc_date_string=spc_date_strings[i])

        print('Number of tornadoes on SPC date "{0:s}" = {1:d}'.format(
            spc_date_strings[i], num_tornadoes_by_day[i]))

    print(SEPARATOR_STRING)

    _plot_tornado_histogram(
        num_tornadoes_by_day=num_tornadoes_by_day,
        output_file_name='{0:s}/tornado_histogram.jpg'.format(output_dir_name))

    _plot_month_histogram(
        spc_date_strings=spc_date_strings,
        output_file_name='{0:s}/month_histogram.jpg'.format(output_dir_name))
예제 #21
0
def write_model_metadata(pickle_file_name,
                         metadata_dict,
                         training_option_dict,
                         list_of_layer_operation_dicts=None):
    """Writes metadata for CNN to Pickle file.

    :param pickle_file_name: Path to output file.
    :param metadata_dict: Dictionary with the following keys.
    metadata_dict['target_name']: Name of target variable (must be accepted by
        `labels.column_name_to_label_params`).
    metadata_dict['num_epochs']: Number of epochs.
    metadata_dict['num_training_batches_per_epoch']: Number of training batches
        in each epoch.
    metadata_dict['num_validation_batches_per_epoch']: Number of validation
        batches in each epoch.
    metadata_dict['monitor_string']: See doc for `_get_checkpoint_object`.
    metadata_dict['weight_loss_function']: See doc for `_check_training_args`.
    metadata_dict['use_2d3d_convolution']: Boolean flag.  If True, the net
        convolves over both 2-D and 3-D radar images, so was trained with
        `train_cnn_2d3d_myrorss`.  If False, the net convolves over only 2-D or
        only 3-D images, so was trained with `train_cnn_2d_or_3d`.
    metadata_dict['validation_file_names']: See doc for `train_cnn_2d_or_3d` or
        `train_cnn_2d3d_myrorss`.
    metadata_dict['first_validn_time_unix_sec']: Same.
    metadata_dict['last_validn_time_unix_sec']: Same.

    :param training_option_dict: See doc for
        `training_validation_io.example_generator_2d_or_3d` or
        `training_validation_io.example_generator_2d3d_myrorss`.
    :param list_of_layer_operation_dicts: List of dictionaries, representing
        layer operations used to reduce 3-D radar images to 2-D.  See doc for
        `input_examples.reduce_examples_3d_to_2d`.
    :raises: ValueError: if any of the aforelisted keys are missing from
        `metadata_dict`.
    """

    orig_training_option_dict = training_option_dict.copy()
    training_option_dict = trainval_io.DEFAULT_OPTION_DICT.copy()
    training_option_dict.update(orig_training_option_dict)

    metadata_dict.update({TRAINING_OPTION_DICT_KEY: training_option_dict})
    metadata_dict.update({LAYER_OPERATIONS_KEY: list_of_layer_operation_dicts})

    missing_keys = list(set(METADATA_KEYS) - set(metadata_dict.keys()))

    if len(missing_keys):
        error_string = (
            'The following keys are missing from `metadata_dict`.\n{0:s}'
        ).format(str(missing_keys))

        raise ValueError(error_string)

    file_system_utils.mkdir_recursive_if_necessary(file_name=pickle_file_name)

    pickle_file_handle = open(pickle_file_name, 'wb')
    pickle.dump(metadata_dict, pickle_file_handle)
    pickle_file_handle.close()
예제 #22
0
def write_processed_file(storm_object_table, pickle_file_name):
    """Writes tracking data to file.

    This file should contain both polygons and track statistics for one time
    step and one tracking scale.

    P = number of grid points in a given storm object

    :param storm_object_table: pandas DataFrame with the following mandatory
        columns.  May also contain distance buffers created by
        make_buffers_around_polygons.  Each row is one storm object.
    storm_object_table.storm_id: String ID for storm cell.
    storm_object_table.unix_time_sec: Valid time.
    storm_object_table.spc_date_unix_sec: SPC date.
    storm_object_table.tracking_start_time_unix_sec: Start time for tracking
        period.
    storm_object_table.tracking_end_time_unix_sec: End time for tracking period.
    storm_object_table.age_sec: Age of storm cell (seconds).
    storm_object_table.east_velocity_m_s01: Eastward velocity of storm cell
        (metres per second).
    storm_object_table.north_velocity_m_s01: Northward velocity of storm cell
        (metres per second).
    storm_object_table.centroid_lat_deg: Latitude at centroid of storm object
        (deg N).
    storm_object_table.centroid_lng_deg: Longitude at centroid of storm object
        (deg E).
    storm_object_table.grid_point_latitudes_deg: length-P numpy array with
        latitudes (deg N) of grid points in storm object.
    storm_object_table.grid_point_longitudes_deg: length-P numpy array with
        longitudes (deg E) of grid points in storm object.
    storm_object_table.grid_point_rows: length-P numpy array with row indices
        (integers) of grid points in storm object.
    storm_object_table.grid_point_columns: length-P numpy array with column
        indices (integers) of grid points in storm object.
    storm_object_table.polygon_object_latlng: Instance of
        `shapely.geometry.Polygon`, with vertices in lat-long coordinates.
    storm_object_table.polygon_object_rowcol: Instance of
        `shapely.geometry.Polygon`, with vertices in row-column coordinates.
    :param pickle_file_name: Path to output file.
    """

    distance_buffer_column_names = get_distance_buffer_columns(
        storm_object_table)
    if distance_buffer_column_names is None:
        distance_buffer_column_names = []

    columns_to_write = MANDATORY_COLUMNS + distance_buffer_column_names
    best_track_column_present_flags = numpy.array(
        [c in list(storm_object_table) for c in BEST_TRACK_COLUMNS])
    if numpy.all(best_track_column_present_flags):
        columns_to_write += BEST_TRACK_COLUMNS

    file_system_utils.mkdir_recursive_if_necessary(file_name=pickle_file_name)
    pickle_file_handle = open(pickle_file_name, 'wb')
    pickle.dump(storm_object_table[columns_to_write], pickle_file_handle)
    pickle_file_handle.close()
예제 #23
0
def download_file_via_passwordless_ssh(host_name=None,
                                       user_name=None,
                                       remote_file_name=None,
                                       local_file_name=None,
                                       raise_error_if_fails=True):
    """Downloads file via passwordless SSH.
    For this to work, the remote machine (from which you are downloading) must
    have the RSA key of the local machine.  See the following page for
    instructions on sharing RSA keys: http://www.linuxproblem.org/art_9.html
    :param host_name: Name of remote machine (example: "thunderhoser.ou.edu").
    :param user_name: User name on remote machine (example: "thunderhoser").
    :param remote_file_name: File path on remote machine (where the file will be
        downloaded from).
    :param local_file_name: File path on local machine (where the file will be
        stored).
    :param raise_error_if_fails: Boolean flag.  If raise_error_if_fails = True
        and download fails, will raise an error.
    :return: local_file_name: If raise_error_if_fails = False and download
        failed, this will be None.  Otherwise, this will be the same as input.
    :raises: ValueError: if download failed and raise_error_if_fails = True.
    """

    # TODO(thunderhoser): Handle exceptions more intelligently.  Currently, if
    # the download fails, this method does not know why it failed.  If the
    # download failed because the file does not exist, this is less severe than
    # if it failed because we can't login to the remote machine.

    error_checking.assert_is_string(host_name)
    error_checking.assert_is_string(user_name)
    error_checking.assert_is_string(remote_file_name)
    error_checking.assert_is_string(local_file_name)
    error_checking.assert_is_boolean(raise_error_if_fails)

    file_system_utils.mkdir_recursive_if_necessary(file_name=local_file_name)

    unix_command_string = (
        'LD_LIBRARY_PATH= rsync -rv -e "{0:s}" {1:s}@{2:s}:"{3:s}" "{4:s}"'
    ).format(SSH_ARG_STRING, user_name, host_name, remote_file_name,
             local_file_name)

    devnull_handle = open(os.devnull, 'w')
    subprocess.call(unix_command_string,
                    shell=True,
                    stdout=devnull_handle,
                    stderr=devnull_handle)

    if not os.path.isfile(local_file_name):
        info_string = ('Download failed.  Local file expected at: ' +
                       local_file_name)
        if raise_error_if_fails:
            raise ValueError(info_string)
        else:
            warnings.warn(info_string)
            local_file_name = None

    return local_file_name
def _run(input_file_name, plot_soundings, allow_whitespace, plot_panel_names,
         add_titles, label_colour_bars, colour_bar_length, output_dir_name):
    """Plots PMM composite over many examples (storm objects).

    This is effectively the main method.

    :param input_file_name: See documentation at top of file.
    :param plot_soundings: Same.
    :param allow_whitespace: Same.
    :param plot_panel_names: Same.
    :param add_titles: Same.
    :param label_colour_bars: Same.
    :param colour_bar_length: Same.
    :param output_dir_name: Same.
    """

    file_system_utils.mkdir_recursive_if_necessary(
        directory_name=output_dir_name)

    print('Reading data from: "{0:s}"...'.format(input_file_name))
    pickle_file_handle = open(input_file_name, 'rb')
    input_dict = pickle.load(pickle_file_handle)
    pickle_file_handle.close()

    mean_predictor_matrices = input_dict[MEAN_PREDICTOR_MATRICES_KEY]
    for i in range(len(mean_predictor_matrices)):
        mean_predictor_matrices[i] = numpy.expand_dims(
            mean_predictor_matrices[i], axis=0
        )

    model_file_name = input_dict[MODEL_FILE_KEY]
    model_metafile_name = '{0:s}/model_metadata.p'.format(
        os.path.split(model_file_name)[0]
    )

    print('Reading metadata from: "{0:s}"...'.format(model_metafile_name))
    model_metadata_dict = cnn.read_model_metadata(model_metafile_name)
    model_metadata_dict[cnn.TRAINING_OPTION_DICT_KEY][
        trainval_io.UPSAMPLE_REFLECTIVITY_KEY
    ] = False

    mean_sounding_pressures_pascals = input_dict[MEAN_SOUNDING_PRESSURES_KEY]
    sounding_pressure_matrix_pascals = numpy.reshape(
        mean_sounding_pressures_pascals,
        (1, len(mean_sounding_pressures_pascals))
    )

    plot_input_examples.plot_examples(
        list_of_predictor_matrices=mean_predictor_matrices,
        model_metadata_dict=model_metadata_dict, pmm_flag=True,
        output_dir_name=output_dir_name, plot_soundings=plot_soundings,
        sounding_pressure_matrix_pascals=sounding_pressure_matrix_pascals,
        allow_whitespace=allow_whitespace, plot_panel_names=plot_panel_names,
        add_titles=add_titles, label_colour_bars=label_colour_bars,
        colour_bar_length=colour_bar_length)
예제 #25
0
def write_results(result_dict, pickle_file_name):
    """Writes results to Pickle file.

    :param result_dict: Dictionary created by `run_sfs`.
    :param pickle_file_name: Path to output file.
    """

    file_system_utils.mkdir_recursive_if_necessary(file_name=pickle_file_name)
    pickle_file_handle = open(pickle_file_name, 'wb')
    pickle.dump(result_dict, pickle_file_handle)
    pickle_file_handle.close()
def write_model_for_each_class(model_object_by_class, pickle_file_name):
    """Writes models to Pickle file.

    :param model_object_by_class: See documentation for
        `train_model_for_each_class`.
    :param pickle_file_name: Path to output file.
    """

    file_system_utils.mkdir_recursive_if_necessary(file_name=pickle_file_name)
    pickle_file_handle = open(pickle_file_name, 'wb')
    pickle.dump(model_object_by_class, pickle_file_handle)
    pickle_file_handle.close()
def _plot_kernel():
    """Plots convolutional kernel.

    J = number of rows in kernel
    K = number of columns in kernel

    :return: kernel_matrix: Kernel as J-by-K numpy array.
    """

    kernel_matrix = numpy.random.choice(a=POSSIBLE_KERNEL_VALUES,
                                        size=(NUM_ROWS_IN_KERNEL,
                                              NUM_COLUMNS_IN_KERNEL),
                                        replace=True)

    dummy_matrix = numpy.full((NUM_ROWS_IN_KERNEL, NUM_COLUMNS_IN_KERNEL),
                              HIGHLIGHTED_VALUE)

    _, axes_object = pyplot.subplots(1,
                                     1,
                                     figsize=(FIGURE_WIDTH_INCHES,
                                              FIGURE_HEIGHT_INCHES))
    pyplot.imshow(dummy_matrix,
                  cmap=COLOUR_MAP_OBJECT,
                  vmin=HIGHLIGHTED_VALUE - 1,
                  vmax=HIGHLIGHTED_VALUE,
                  axes=axes_object,
                  origin='upper')
    pyplot.xticks([], [])
    pyplot.yticks([], [])

    for i in range(kernel_matrix.shape[1]):
        for j in range(kernel_matrix.shape[0]):
            axes_object.text(i,
                             j,
                             '{0:.1f}'.format(kernel_matrix[j, i]),
                             fontsize=OVERLAY_FONT_SIZE,
                             color=MAIN_COLOUR,
                             horizontalalignment='center',
                             verticalalignment='center')

    plotting_utils.annotate_axes(axes_object=axes_object,
                                 annotation_string='(b)',
                                 font_colour=ANNOTATION_COLOUR)

    print 'Saving figure to: "{0:s}"...'.format(KERNEL_FILE_NAME)
    file_system_utils.mkdir_recursive_if_necessary(file_name=KERNEL_FILE_NAME)
    pyplot.savefig(KERNEL_FILE_NAME, dpi=OUTPUT_RESOLUTION_DPI)
    pyplot.close()

    imagemagick_utils.trim_whitespace(input_file_name=KERNEL_FILE_NAME,
                                      output_file_name=KERNEL_FILE_NAME)

    return kernel_matrix
예제 #28
0
def _extract_single_field_to_file(grib_file_name,
                                  grib1_field_name=None,
                                  output_file_name=None,
                                  wgrib_exe_name=WGRIB_EXE_NAME_DEFAULT,
                                  wgrib2_exe_name=WGRIB2_EXE_NAME_DEFAULT,
                                  raise_error_if_fails=True):
    """Extracts single field from grib1 or grib2 file; writes to text file.

    A "single field" is one variable at one time step and all grid cells.

    :param grib_file_name: Path to input (grib1 or grib2) file.
    :param grib1_field_name: Field name in grib1 format (example: 500-mb height
        is "HGT:500 mb").
    :param output_file_name: Path to output file.
    :param wgrib_exe_name: Path to wgrib executable.
    :param wgrib2_exe_name: Path to wgrib2 executable.
    :param raise_error_if_fails: Boolean flag.  If command fails and
        raise_error_if_fails = True, will raise an error.
    :return: success: Boolean flag.  If command succeeded, this is True.  If
        command failed and raise_error_if_fails = False, this is False.
    :raises: OSError: if command fails and raise_error_if_fails = True.
    """

    grib_file_type = _get_file_type(grib_file_name)
    file_system_utils.mkdir_recursive_if_necessary(file_name=output_file_name)

    if grib_file_type == GRIB1_FILE_TYPE:
        command_string = ('"' + wgrib_exe_name + '" "' + grib_file_name +
                          '" -s | grep -w "' + grib1_field_name + '" | "' +
                          wgrib_exe_name + '" -i "' + grib_file_name +
                          '" -text -nh -o "' + output_file_name + '"')
    else:
        command_string = ('"' + wgrib2_exe_name + '" "' + grib_file_name +
                          '" -s | grep -w "' +
                          _field_name_grib1_to_grib2(grib1_field_name) +
                          '" | "' + wgrib2_exe_name + '" -i "' +
                          grib_file_name + '" -no_header -text "' +
                          output_file_name + '"')

    try:
        subprocess.call(command_string, shell=True)
    except OSError as this_exception:
        if raise_error_if_fails:
            raise

        warn_string = (
            '\n\n' + command_string +
            '\n\nCommand (shown above) failed (details shown below).\n\n' +
            str(this_exception))
        warnings.warn(warn_string)
        return False

    return True
def write_model(model_object, pickle_file_name):
    """Writes model to Pickle file.

    :param model_object: Instance (preferably trained) of
        `xgboost.XGBClassifier`.
    :param pickle_file_name: Path to output file.
    """

    file_system_utils.mkdir_recursive_if_necessary(file_name=pickle_file_name)
    pickle_file_handle = open(pickle_file_name, 'wb')
    pickle.dump(model_object, pickle_file_handle)
    pickle_file_handle.close()
def _run():
    """Makes schema for storm-velocity estimation.

    This is effectively the main method.
    """

    file_system_utils.mkdir_recursive_if_necessary(file_name=OUTPUT_FILE_NAME)

    storm_object_table = _create_tracking_data()

    _plot_schema(storm_object_table=storm_object_table,
                 output_file_name=OUTPUT_FILE_NAME)