def write_file( pickle_file_name, activation_matrix, storm_ids, storm_times_unix_sec, model_file_name, component_type_string, target_class=None, layer_name=None, neuron_index_matrix=None, channel_indices=None): """Writes activations to Pickle file. E = number of examples (storm objects) C = number of model components (classes, neurons, or channels) for which activations were computed :param pickle_file_name: Path to output file. :param activation_matrix: E-by-C numpy array of activations, where activation_matrix[i, j] = activation of the [j]th model component for the [i]th example. :param storm_ids: length-E list of storm IDs. :param storm_times_unix_sec: length-E numpy array of storm times. :param model_file_name: Path to file with trained model. :param component_type_string: See doc for `check_metadata`. :param target_class: Same. :param layer_name: Same. :param neuron_index_matrix: Same. :param channel_indices: Same. """ num_components = check_metadata( component_type_string=component_type_string, target_class=target_class, layer_name=layer_name, neuron_index_matrix=neuron_index_matrix, channel_indices=channel_indices) error_checking.assert_is_string(model_file_name) error_checking.assert_is_string_list(storm_ids) error_checking.assert_is_numpy_array( numpy.array(storm_ids), num_dimensions=1) num_examples = len(storm_ids) error_checking.assert_is_integer_numpy_array(storm_times_unix_sec) error_checking.assert_is_numpy_array( storm_times_unix_sec, exact_dimensions=numpy.array([num_examples])) error_checking.assert_is_numpy_array_without_nan(activation_matrix) error_checking.assert_is_numpy_array( activation_matrix, exact_dimensions=numpy.array([num_examples, num_components])) metadata_dict = { STORM_IDS_KEY: storm_ids, STORM_TIMES_KEY: storm_times_unix_sec, MODEL_FILE_NAME_KEY: model_file_name, COMPONENT_TYPE_KEY: component_type_string, TARGET_CLASS_KEY: target_class, LAYER_NAME_KEY: layer_name, NEURON_INDICES_KEY: neuron_index_matrix, CHANNEL_INDICES_KEY: channel_indices, } file_system_utils.mkdir_recursive_if_necessary(file_name=pickle_file_name) pickle_file_handle = open(pickle_file_name, 'wb') pickle.dump(activation_matrix, pickle_file_handle) pickle.dump(metadata_dict, pickle_file_handle) pickle_file_handle.close()
def _plot_feature_map_after_conv(feature_matrix): """Plots new feature map (after convolution). M = number of rows in grid N = number of columns in grid :param feature_matrix: Feature map as M-by-N numpy array. """ dummy_matrix = numpy.full(feature_matrix.shape, numpy.nan) dummy_matrix[:2, :2] = HIGHLIGHTED_VALUE _, axes_object = pyplot.subplots(1, 1, figsize=(FIGURE_WIDTH_INCHES, FIGURE_HEIGHT_INCHES)) pyplot.imshow(dummy_matrix, cmap=COLOUR_MAP_OBJECT, vmin=HIGHLIGHTED_VALUE - 1, vmax=HIGHLIGHTED_VALUE, axes=axes_object, origin='upper') pyplot.xticks([], []) pyplot.yticks([], []) for i in range(feature_matrix.shape[1]): for j in range(feature_matrix.shape[0]): if i == j == 1: this_colour = SPECIAL_COLOUR + 0. else: this_colour = MAIN_COLOUR + 0. axes_object.text(i, j, '{0:.1f}'.format(feature_matrix[j, i]), fontsize=OVERLAY_FONT_SIZE, color=this_colour, horizontalalignment='center', verticalalignment='center') # polygon_x_coords = numpy.array([0, 2, 2, 0, 0], dtype=float) - 0.5 # polygon_y_coords = numpy.array([2, 2, 0, 0, 2], dtype=float) - 0.5 # axes_object.plot( # polygon_x_coords, polygon_y_coords, color=LINE_COLOUR, # linewidth=LINE_WIDTH) plotting_utils.annotate_axes(axes_object=axes_object, annotation_string='(c)', font_colour=ANNOTATION_COLOUR) print 'Saving figure to: "{0:s}"...'.format(AFTER_CONV_FILE_NAME) file_system_utils.mkdir_recursive_if_necessary( file_name=AFTER_CONV_FILE_NAME) pyplot.savefig(AFTER_CONV_FILE_NAME, dpi=OUTPUT_RESOLUTION_DPI) pyplot.close() imagemagick_utils.trim_whitespace(input_file_name=AFTER_CONV_FILE_NAME, output_file_name=AFTER_CONV_FILE_NAME) return feature_matrix
def _write_intermediate_results(storm_object_table, temp_file_name): """Writes intermediate best-track results for a subset of storm objects. P = number of grid points in a given storm object :param storm_object_table: pandas DataFrame with the following columns. Each row is one storm object. storm_object_table.storm_id: String ID for storm cell. storm_object_table.original_storm_id: Original ID (before best-track). storm_object_table.unix_time_sec: Valid time. storm_object_table.spc_date_unix_sec: Valid SPC date. storm_object_table.grid_point_latitudes_deg: length-P numpy array with latitudes (deg N) of grid points in storm object. storm_object_table.grid_point_longitudes_deg: length-P numpy array with longitudes (deg E) of grid points in storm object. storm_object_table.grid_point_rows: length-P numpy array with row indices (integers) of grid points in storm object. storm_object_table.grid_point_columns: length-P numpy array with column indices (integers) of grid points in storm object. storm_object_table.centroid_x_metres: x-coordinate of storm centroid. storm_object_table.centroid_y_metres: y-coordinate of storm centroid. storm_object_table.file_index: Array index of file containing storm object. This is an index into the file-name array for the given SPC date. :param temp_file_name: Path to intermediate file. """ file_system_utils.mkdir_recursive_if_necessary(file_name=temp_file_name) pickle_file_handle = open(temp_file_name, 'wb') pickle.dump(storm_object_table[INTERMEDIATE_COLUMNS], pickle_file_handle) pickle_file_handle.close()
def write_to_netcdf(latitudes_deg, longitudes_deg, netcdf_file_name): """Writes boundary to NetCDF file. :param latitudes_deg: See doc for `_check_boundary`. :param longitudes_deg: Same. :param netcdf_file_name: Path to output file. """ longitudes_deg = _check_boundary(latitudes_deg=latitudes_deg, longitudes_deg=longitudes_deg) file_system_utils.mkdir_recursive_if_necessary(file_name=netcdf_file_name) dataset_object = netCDF4.Dataset(netcdf_file_name, 'w', format='NETCDF3_64BIT_OFFSET') num_points = len(latitudes_deg) dataset_object.createDimension(NETCDF_VERTEX_DIMENSION_KEY, num_points) dataset_object.createVariable(NETCDF_LATITUDES_KEY, datatype=numpy.float32, dimensions=NETCDF_VERTEX_DIMENSION_KEY) dataset_object.variables[NETCDF_LATITUDES_KEY][:] = latitudes_deg dataset_object.createVariable(NETCDF_LONGITUDES_KEY, datatype=numpy.float32, dimensions=NETCDF_VERTEX_DIMENSION_KEY) dataset_object.variables[NETCDF_LONGITUDES_KEY][:] = longitudes_deg dataset_object.close()
def _run(output_file_name): """Plots Laplacian kernel used for edge-detector test. This is effectively the main method. :param output_file_name: See documentation at top of file. """ num_heights = KERNEL_MATRIX_3D.shape[-1] figure_object, axes_object_matrix = plotting_utils.create_paneled_figure( num_rows=1, num_columns=num_heights, horizontal_spacing=0.1, vertical_spacing=0.1, shared_x_axis=False, shared_y_axis=False, keep_aspect_ratio=True) for k in range(num_heights): _plot_kernel_one_height(kernel_matrix_2d=KERNEL_MATRIX_3D[..., k], axes_object=axes_object_matrix[0, k]) axes_object_matrix[0, 0].set_title('Bottom height') axes_object_matrix[0, 1].set_title('Middle height') axes_object_matrix[0, 2].set_title('Top height') file_system_utils.mkdir_recursive_if_necessary(file_name=output_file_name) print('Saving figure to: "{0:s}"...'.format(output_file_name)) figure_object.savefig(output_file_name, dpi=FIGURE_RESOLUTION_DPI, pad_inches=0, bbox_inches='tight') pyplot.close(figure_object)
def write_narr_grids_to_file(frontal_grid_table, pickle_file_name): """Writes one or more NARR* grids to file. * NARR = North American Regional Reanalysis :param frontal_grid_table: pandas DataFrame with the following columns. Each row is one valid time. frontal_grid_table.unix_time_sec: Valid time. frontal_grid_table.warm_front_row_indices: length-W numpy array with row indices (integers) of grid cells intersected by a warm front. frontal_grid_table.warm_front_column_indices: Same as above, except for columns. frontal_grid_table.cold_front_row_indices: length-C numpy array with row indices (integers) of grid cells intersected by a cold front. frontal_grid_table.cold_front_column_indices: Same as above, except for columns. :param pickle_file_name: Path to output file. """ error_checking.assert_columns_in_dataframe(frontal_grid_table, REQUIRED_GRID_COLUMNS) file_system_utils.mkdir_recursive_if_necessary(file_name=pickle_file_name) pickle_file_handle = open(pickle_file_name, 'wb') pickle.dump(frontal_grid_table[REQUIRED_GRID_COLUMNS], pickle_file_handle) pickle_file_handle.close()
def _plot_scores_as_grid( score_matrix, colour_map_object, min_colour_value, max_colour_value, x_tick_labels, x_axis_label, x_axis_text_colour, y_tick_labels, y_axis_label, y_axis_text_colour, title_string, output_file_name): """Plots model scores as 2-D grid. M = number of rows in grid N = number of columns in grid :param score_matrix: M-by-N numpy array of model scores. :param colour_map_object: Instance of `matplotlib.colors.ListedColormap`. :param min_colour_value: Minimum value in colour map. :param max_colour_value: Max value in colour map. :param x_tick_labels: length-N list of string labels. :param x_axis_label: String label for the entire x-axis. :param x_axis_text_colour: Colour for all text labels along x-axis. :param y_tick_labels: length-M list of string labels. :param y_axis_label: String label for the entire y-axis. :param y_axis_text_colour: Colour for all text labels along y-axis. :param title_string: Figure title. :param output_file_name: Path to output file (the figure will be saved here). """ _, axes_object = pyplot.subplots( 1, 1, figsize=(FIGURE_WIDTH_INCHES, FIGURE_HEIGHT_INCHES)) score_matrix_to_plot = score_matrix + 0. score_matrix_to_plot[numpy.isnan(score_matrix_to_plot)] = 0. pyplot.imshow( score_matrix_to_plot, cmap=colour_map_object, origin='lower', vmin=min_colour_value, vmax=max_colour_value) x_tick_values = numpy.linspace( 0, score_matrix_to_plot.shape[1] - 1, num=score_matrix_to_plot.shape[1], dtype=float) pyplot.xticks(x_tick_values, x_tick_labels, color=x_axis_text_colour) pyplot.xlabel(x_axis_label, color=x_axis_text_colour) y_tick_values = numpy.linspace( 0, score_matrix_to_plot.shape[0] - 1, num=score_matrix_to_plot.shape[0], dtype=float) pyplot.yticks(y_tick_values, y_tick_labels, color=y_axis_text_colour) pyplot.ylabel(y_axis_label, color=y_axis_text_colour) pyplot.title(title_string) plotting_utils.add_linear_colour_bar( axes_object_or_list=axes_object, values_to_colour=score_matrix_to_plot, colour_map=colour_map_object, colour_min=min_colour_value, colour_max=max_colour_value, orientation='vertical', extend_min=True, extend_max=True, font_size=FONT_SIZE) print 'Saving figure to: "{0:s}"...'.format(output_file_name) file_system_utils.mkdir_recursive_if_necessary(file_name=output_file_name) pyplot.savefig(output_file_name, dpi=FIGURE_RESOLUTION_DPI) pyplot.close() imagemagick_utils.trim_whitespace( input_file_name=output_file_name, output_file_name=output_file_name)
def write_processed_file(wind_table, csv_file_name=None, write_mode='w'): """Writes wind observations to file. This is considered a "processed file," as opposed to a "raw file". A "raw file" is one taken directly from another database, in the native format of said database. For examples, see `madis_io.read_winds_from_raw_file` and `ok_mesonet_io.read_winds_from_raw_file`. :param wind_table: pandas DataFrame with the following columns. wind_table.station_id: String ID for station. wind_table.station_name: Verbose name for station. wind_table.latitude_deg: Latitude (deg N). wind_table.longitude_deg: Longitude (deg E). wind_table.elevation_m_asl: Elevation (metres above sea level). wind_table.unix_time_sec: Valid time in Unix format. wind_table.u_wind_m_s01: u-wind (metres per second). wind_table.v_wind_m_s01: v-wind (metres per second). :param csv_file_name: Path to output file. :param write_mode: Any string accepted by the built-in method `open`. """ error_checking.assert_columns_in_dataframe(wind_table, WIND_COLUMNS) file_system_utils.mkdir_recursive_if_necessary(file_name=csv_file_name) write_header = not os.path.isfile(csv_file_name) or 'w' in write_mode wind_table.to_csv(csv_file_name, header=write_header, columns=WIND_COLUMNS, index=False, mode=write_mode)
def resize_image(input_file_name, output_file_name, output_size_pixels, convert_exe_name=DEFAULT_CONVERT_EXE_NAME): """Resizes image. :param input_file_name: Path to input file (may be in any format handled by ImageMagick). :param output_file_name: Path to output file. :param output_size_pixels: Output size. :param convert_exe_name: See doc for `trim_whitespace`. :raises: ValueError: if ImageMagick command (which is ultimately a Unix command) fails. """ error_checking.assert_file_exists(input_file_name) file_system_utils.mkdir_recursive_if_necessary(file_name=output_file_name) error_checking.assert_is_integer(output_size_pixels) error_checking.assert_is_greater(output_size_pixels, 0) error_checking.assert_file_exists(convert_exe_name) command_string = '"{0:s}" "{1:s}" -resize {2:d}@ "{3:s}"'.format( convert_exe_name, input_file_name, output_size_pixels, output_file_name) exit_code = os.system(command_string) if exit_code == 0: return raise ValueError(ERROR_STRING)
def write_ids_and_times(full_id_strings, storm_times_unix_sec, pickle_file_name): """Writes full storm IDs and valid times (minimal metadata) to Pickle file. N = number of storm objects :param full_id_strings: length-N list of full IDs. :param storm_times_unix_sec: length-N numpy array of valid times. :param pickle_file_name: Path to output file. """ error_checking.assert_is_string_list(full_id_strings) error_checking.assert_is_numpy_array( numpy.array(full_id_strings), num_dimensions=1) num_storm_objects = len(full_id_strings) error_checking.assert_is_integer_numpy_array(storm_times_unix_sec) error_checking.assert_is_numpy_array( storm_times_unix_sec, exact_dimensions=numpy.array([num_storm_objects], dtype=int) ) metadata_dict = { FULL_IDS_KEY: full_id_strings, STORM_TIMES_KEY: storm_times_unix_sec, } file_system_utils.mkdir_recursive_if_necessary(file_name=pickle_file_name) pickle_file_handle = open(pickle_file_name, 'wb') pickle.dump(metadata_dict, pickle_file_handle) pickle_file_handle.close()
def write_results(result_dict, pickle_file_name): """Writes results to Pickle file. :param result_dict: Dictionary created by `run_permutation_test`, maybe with additional keys. :param pickle_file_name: Path to output file. :raises: ValueError: if any required keys are not found in the dictionary. """ missing_keys = list(set(REQUIRED_KEYS) - set(result_dict.keys())) if len(missing_keys) == 0: file_system_utils.mkdir_recursive_if_necessary( file_name=pickle_file_name) pickle_file_handle = open(pickle_file_name, 'wb') pickle.dump(result_dict, pickle_file_handle) pickle_file_handle.close() return error_string = ( '\n{0:s}\nKeys listed above were expected, but not found, in ' 'dictionary.').format(str(missing_keys)) raise ValueError(error_string)
def trim_whitespace(input_file_name, output_file_name, border_width_pixels=10, convert_exe_name=DEFAULT_CONVERT_EXE_NAME): """Trims whitespace around edge of image. :param input_file_name: Path to input file (may be in any format handled by ImageMagick). :param output_file_name: Path to output file. :param border_width_pixels: Desired border width (whitespace). :param convert_exe_name: Path to executable file for ImageMagick's "convert" function. If you installed ImageMagick with root access, this should be the default. Regardless, the pathless file name should be just "convert". :raises: ValueError: if ImageMagick command (which is ultimately a Unix command) fails. """ error_checking.assert_file_exists(input_file_name) file_system_utils.mkdir_recursive_if_necessary(file_name=output_file_name) error_checking.assert_is_integer(border_width_pixels) error_checking.assert_is_geq(border_width_pixels, 0) error_checking.assert_file_exists(convert_exe_name) command_string = ( '"{0:s}" "{1:s}" -trim -bordercolor White -border {2:d} "{3:s}"' ).format(convert_exe_name, input_file_name, border_width_pixels, output_file_name) exit_code = os.system(command_string) if exit_code == 0: return raise ValueError(ERROR_STRING)
def write_processed_file(tornado_table, csv_file_name): """Writes tornado reports to CSV file. This is considered a "processed file," as opposed to a "raw file" (one taken directly from the Storm Events database). Raw files with tornado reports are handled by storm_events_io.py. :param tornado_table: pandas DataFrame with the following columns. tornado_table.start_time_unix_sec: Start time. tornado_table.end_time_unix_sec: End time. tornado_table.start_latitude_deg: Latitude (deg N) of start point. tornado_table.start_longitude_deg: Longitude (deg E) of start point. tornado_table.end_latitude_deg: Latitude (deg N) of end point. tornado_table.end_longitude_deg: Longitude (deg E) of end point. tornado_table.fujita_rating: F-scale or EF-scale rating (integer from 0...5). tornado_table.width_metres: Tornado width (metres). :param csv_file_name: Path to output file. """ error_checking.assert_columns_in_dataframe(tornado_table, MANDATORY_COLUMNS) file_system_utils.mkdir_recursive_if_necessary(file_name=csv_file_name) tornado_table.to_csv(csv_file_name, header=True, columns=MANDATORY_COLUMNS, index=False)
def write_features_for_storm_objects(feature_table, pickle_file_name): """Writes features for storm objects to a Pickle file. :param feature_table: pandas DataFrame created by join_features_and_label_for_storm_objects. :param pickle_file_name: Path to output file. """ (feature_column_names, regression_label_column_name, classification_label_column_name) = check_feature_table( feature_table, require_storm_objects=True) distance_buffer_column_names = tracking_io.get_distance_buffer_columns( feature_table) columns_to_write = (STORM_TO_WIND_COLUMNS_TO_KEEP + feature_column_names + distance_buffer_column_names) if regression_label_column_name is not None: columns_to_write += [regression_label_column_name] if classification_label_column_name is not None: columns_to_write += [classification_label_column_name] file_system_utils.mkdir_recursive_if_necessary(file_name=pickle_file_name) pickle_file_handle = open(pickle_file_name, 'wb') pickle.dump(feature_table[columns_to_write], pickle_file_handle) pickle_file_handle.close()
def write_station_metadata_to_processed_file(station_metadata_table, csv_file_name): """Writes metadata for weather stations to file. This is considered a "processed file," as opposed to a "raw file". A "raw file" is one taken directly from another database, in the native format of said database. For examples, see `hfmetar_io.read_station_metadata_from_raw_file` and `ok_mesonet_io.read_station_metadata_from_raw_file`. :param station_metadata_table: pandas DataFrame with the following columns. station_metadata_table.station_id: String ID for station. station_metadata_table.station_name: Verbose name for station. station_metadata_table.latitude_deg: Latitude (deg N). station_metadata_table.longitude_deg: Longitude (deg E). station_metadata_table.elevation_m_asl: Elevation (metres above sea level). station_metadata_table.utc_offset_hours [optional]: Local time minus UTC. :param csv_file_name: Path to output file. """ error_checking.assert_columns_in_dataframe( station_metadata_table, REQUIRED_STATION_METADATA_COLUMNS) file_system_utils.mkdir_recursive_if_necessary(file_name=csv_file_name) station_metadata_table.to_csv(csv_file_name, header=True, columns=STATION_METADATA_COLUMNS, index=False)
def unzip_tar(tar_file_name, target_directory_name=None, file_and_dir_names_to_unzip=None): """Unzips tar file. :param tar_file_name: Path to input file. :param target_directory_name: Path to output directory. :param file_and_dir_names_to_unzip: List of files and directories to extract from the tar file. Each list element should be a relative path inside the tar file. After unzipping, the same relative path will exist inside `target_directory_name`. :raises: ValueError: if the Unix command fails. """ error_checking.assert_is_string(tar_file_name) error_checking.assert_is_string_list(file_and_dir_names_to_unzip) file_system_utils.mkdir_recursive_if_necessary( directory_name=target_directory_name) unix_command_string = 'tar -C "{0:s}" -xvf "{1:s}"'.format( target_directory_name, tar_file_name) for this_relative_path in file_and_dir_names_to_unzip: unix_command_string += ' "' + this_relative_path + '"' exit_code = os.system(unix_command_string) if exit_code != 0: raise ValueError('\nUnix command failed (log messages shown above ' 'should explain why).')
def write_pmm_file(pickle_file_name, mean_denorm_predictor_matrices, mean_saliency_matrices, model_file_name, non_pmm_file_name, pmm_max_percentile_level, mean_sounding_pressures_pa=None): """Writes composite saliency map to Pickle file. The composite should be created by probability-matched means (PMM). H = number of sounding heights :param pickle_file_name: Path to output file. :param mean_denorm_predictor_matrices: See doc for `_check_in_and_out_matrices`. :param mean_saliency_matrices: Same. :param model_file_name: Path to model that created saliency maps (readable by `cnn.read_model`). :param non_pmm_file_name: Path to standard saliency file (containing non-composited saliency maps). :param pmm_max_percentile_level: Max percentile level for PMM. :param mean_sounding_pressures_pa: length-H numpy array of PMM-composited sounding pressures. Needed only if the model is trained with soundings but without pressure as a predictor. """ error_checking.assert_is_string(model_file_name) error_checking.assert_is_string(non_pmm_file_name) error_checking.assert_is_geq(pmm_max_percentile_level, 90.) error_checking.assert_is_leq(pmm_max_percentile_level, 100.) _check_in_and_out_matrices( predictor_matrices=mean_denorm_predictor_matrices, num_examples=None, saliency_matrices=mean_saliency_matrices) if mean_sounding_pressures_pa is not None: num_heights = mean_denorm_predictor_matrices[-1].shape[-2] these_expected_dim = numpy.array([num_heights], dtype=int) error_checking.assert_is_geq_numpy_array(mean_sounding_pressures_pa, 0.) error_checking.assert_is_numpy_array( mean_sounding_pressures_pa, exact_dimensions=these_expected_dim) mean_saliency_dict = { MEAN_PREDICTOR_MATRICES_KEY: mean_denorm_predictor_matrices, MEAN_SALIENCY_MATRICES_KEY: mean_saliency_matrices, MODEL_FILE_KEY: model_file_name, NON_PMM_FILE_KEY: non_pmm_file_name, PMM_MAX_PERCENTILE_KEY: pmm_max_percentile_level, MEAN_SOUNDING_PRESSURES_KEY: mean_sounding_pressures_pa } file_system_utils.mkdir_recursive_if_necessary(file_name=pickle_file_name) pickle_file_handle = open(pickle_file_name, 'wb') pickle.dump(mean_saliency_dict, pickle_file_handle) pickle_file_handle.close()
def _run(input_dir_name, output_dir_name): """Creates figure showing overall model evaluation. This is effectively the main method. :param input_dir_name: See documentation at top of file. :param output_dir_name: Same. """ file_system_utils.mkdir_recursive_if_necessary( directory_name=output_dir_name) panel_file_names = [ '{0:s}/{1:s}'.format(input_dir_name, p) for p in PATHLESS_INPUT_FILE_NAMES ] resized_panel_file_names = [ '{0:s}/{1:s}'.format(output_dir_name, p) for p in PATHLESS_INPUT_FILE_NAMES ] letter_label = None for i in range(len(panel_file_names)): print('Resizing panel and saving to: "{0:s}"...'.format( resized_panel_file_names[i])) imagemagick_utils.trim_whitespace( input_file_name=panel_file_names[i], output_file_name=resized_panel_file_names[i]) if letter_label is None: letter_label = 'a' else: letter_label = chr(ord(letter_label) + 1) _overlay_text(image_file_name=resized_panel_file_names[i], x_offset_from_left_px=0, y_offset_from_top_px=TITLE_FONT_SIZE, text_string='({0:s})'.format(letter_label)) imagemagick_utils.resize_image( input_file_name=resized_panel_file_names[i], output_file_name=resized_panel_file_names[i], output_size_pixels=PANEL_SIZE_PX) concat_figure_file_name = '{0:s}/overall_evaluation.jpg'.format( output_dir_name) print( 'Concatenating panels to: "{0:s}"...'.format(concat_figure_file_name)) imagemagick_utils.concatenate_images( input_file_names=resized_panel_file_names, output_file_name=concat_figure_file_name, num_panel_rows=NUM_PANEL_ROWS, num_panel_columns=NUM_PANEL_COLUMNS) imagemagick_utils.resize_image(input_file_name=concat_figure_file_name, output_file_name=concat_figure_file_name, output_size_pixels=CONCAT_FIGURE_SIZE_PX)
def write_ensembled_predictions(pickle_file_name, class_probability_matrix, valid_times_unix_sec, narr_mask_matrix, prediction_dir_name_by_model, model_weights): """Writes ensembled predictions to Pickle file. An "ensembled prediction" is an ensemble of gridded predictions from two or more NFA models. T = number of time steps M = number of rows in grid N = number of columns in grid C = number of classes :param pickle_file_name: Path to output file. :param class_probability_matrix: T-by-M-by-N-by-C numpy array of class probabilities. :param valid_times_unix_sec: length-T numpy array of time steps. :param narr_mask_matrix: See doc for `write_gridded_predictions`. :param prediction_dir_name_by_model: See doc for `check_ensemble_metadata`. :param model_weights: Same. """ error_checking.assert_is_geq_numpy_array(class_probability_matrix, 0.) error_checking.assert_is_leq_numpy_array(class_probability_matrix, 1.) error_checking.assert_is_numpy_array(class_probability_matrix, num_dimensions=4) ml_utils.check_narr_mask(narr_mask_matrix) these_expected_dim = numpy.array(class_probability_matrix.shape[1:3], dtype=int) error_checking.assert_is_numpy_array(narr_mask_matrix, exact_dimensions=these_expected_dim) error_checking.assert_is_integer_numpy_array(valid_times_unix_sec) num_times = class_probability_matrix.shape[0] these_expected_dim = numpy.array([num_times], dtype=int) error_checking.assert_is_numpy_array(valid_times_unix_sec, exact_dimensions=these_expected_dim) check_ensemble_metadata( prediction_dir_name_by_model=prediction_dir_name_by_model, model_weights=model_weights) ensemble_dict = { CLASS_PROBABILITIES_KEY: class_probability_matrix, VALID_TIMES_KEY: valid_times_unix_sec, NARR_MASK_KEY: narr_mask_matrix, MODEL_DIRECTORIES_KEY: prediction_dir_name_by_model, MODEL_WEIGHTS_KEY: model_weights } file_system_utils.mkdir_recursive_if_necessary(file_name=pickle_file_name) pickle_file_handle = open(pickle_file_name, 'wb') pickle.dump(ensemble_dict, pickle_file_handle) pickle_file_handle.close()
def _run(tornado_dir_name, top_gridrad_dir_name, first_spc_date_string, last_spc_date_string, output_dir_name): """Plots histograms for GridRad dataset. This is effectively the main method. :param tornado_dir_name: See documentation at top of file. :param top_gridrad_dir_name: Same. :param first_spc_date_string: Same. :param last_spc_date_string: Same. :param output_dir_name: Same. """ file_system_utils.mkdir_recursive_if_necessary( directory_name=output_dir_name) all_spc_date_strings = time_conversion.get_spc_dates_in_range( first_spc_date_string=first_spc_date_string, last_spc_date_string=last_spc_date_string) spc_date_strings = [] for this_spc_date_string in all_spc_date_strings: this_gridrad_file_name = _find_gridrad_file_for_date( top_gridrad_dir_name=top_gridrad_dir_name, spc_date_string=this_spc_date_string) if this_gridrad_file_name is None: continue spc_date_strings.append(this_spc_date_string) first_year, last_year = _spc_dates_to_years(spc_date_strings) tornado_table = _read_tornado_reports(tornado_dir_name=tornado_dir_name, first_year=first_year, last_year=last_year) print(SEPARATOR_STRING) num_days = len(spc_date_strings) num_tornadoes_by_day = numpy.full(num_days, -1, dtype=int) for i in range(num_days): num_tornadoes_by_day[i] = _get_num_tornadoes_in_day( tornado_table=tornado_table, spc_date_string=spc_date_strings[i]) print('Number of tornadoes on SPC date "{0:s}" = {1:d}'.format( spc_date_strings[i], num_tornadoes_by_day[i])) print(SEPARATOR_STRING) _plot_tornado_histogram( num_tornadoes_by_day=num_tornadoes_by_day, output_file_name='{0:s}/tornado_histogram.jpg'.format(output_dir_name)) _plot_month_histogram( spc_date_strings=spc_date_strings, output_file_name='{0:s}/month_histogram.jpg'.format(output_dir_name))
def write_model_metadata(pickle_file_name, metadata_dict, training_option_dict, list_of_layer_operation_dicts=None): """Writes metadata for CNN to Pickle file. :param pickle_file_name: Path to output file. :param metadata_dict: Dictionary with the following keys. metadata_dict['target_name']: Name of target variable (must be accepted by `labels.column_name_to_label_params`). metadata_dict['num_epochs']: Number of epochs. metadata_dict['num_training_batches_per_epoch']: Number of training batches in each epoch. metadata_dict['num_validation_batches_per_epoch']: Number of validation batches in each epoch. metadata_dict['monitor_string']: See doc for `_get_checkpoint_object`. metadata_dict['weight_loss_function']: See doc for `_check_training_args`. metadata_dict['use_2d3d_convolution']: Boolean flag. If True, the net convolves over both 2-D and 3-D radar images, so was trained with `train_cnn_2d3d_myrorss`. If False, the net convolves over only 2-D or only 3-D images, so was trained with `train_cnn_2d_or_3d`. metadata_dict['validation_file_names']: See doc for `train_cnn_2d_or_3d` or `train_cnn_2d3d_myrorss`. metadata_dict['first_validn_time_unix_sec']: Same. metadata_dict['last_validn_time_unix_sec']: Same. :param training_option_dict: See doc for `training_validation_io.example_generator_2d_or_3d` or `training_validation_io.example_generator_2d3d_myrorss`. :param list_of_layer_operation_dicts: List of dictionaries, representing layer operations used to reduce 3-D radar images to 2-D. See doc for `input_examples.reduce_examples_3d_to_2d`. :raises: ValueError: if any of the aforelisted keys are missing from `metadata_dict`. """ orig_training_option_dict = training_option_dict.copy() training_option_dict = trainval_io.DEFAULT_OPTION_DICT.copy() training_option_dict.update(orig_training_option_dict) metadata_dict.update({TRAINING_OPTION_DICT_KEY: training_option_dict}) metadata_dict.update({LAYER_OPERATIONS_KEY: list_of_layer_operation_dicts}) missing_keys = list(set(METADATA_KEYS) - set(metadata_dict.keys())) if len(missing_keys): error_string = ( 'The following keys are missing from `metadata_dict`.\n{0:s}' ).format(str(missing_keys)) raise ValueError(error_string) file_system_utils.mkdir_recursive_if_necessary(file_name=pickle_file_name) pickle_file_handle = open(pickle_file_name, 'wb') pickle.dump(metadata_dict, pickle_file_handle) pickle_file_handle.close()
def write_processed_file(storm_object_table, pickle_file_name): """Writes tracking data to file. This file should contain both polygons and track statistics for one time step and one tracking scale. P = number of grid points in a given storm object :param storm_object_table: pandas DataFrame with the following mandatory columns. May also contain distance buffers created by make_buffers_around_polygons. Each row is one storm object. storm_object_table.storm_id: String ID for storm cell. storm_object_table.unix_time_sec: Valid time. storm_object_table.spc_date_unix_sec: SPC date. storm_object_table.tracking_start_time_unix_sec: Start time for tracking period. storm_object_table.tracking_end_time_unix_sec: End time for tracking period. storm_object_table.age_sec: Age of storm cell (seconds). storm_object_table.east_velocity_m_s01: Eastward velocity of storm cell (metres per second). storm_object_table.north_velocity_m_s01: Northward velocity of storm cell (metres per second). storm_object_table.centroid_lat_deg: Latitude at centroid of storm object (deg N). storm_object_table.centroid_lng_deg: Longitude at centroid of storm object (deg E). storm_object_table.grid_point_latitudes_deg: length-P numpy array with latitudes (deg N) of grid points in storm object. storm_object_table.grid_point_longitudes_deg: length-P numpy array with longitudes (deg E) of grid points in storm object. storm_object_table.grid_point_rows: length-P numpy array with row indices (integers) of grid points in storm object. storm_object_table.grid_point_columns: length-P numpy array with column indices (integers) of grid points in storm object. storm_object_table.polygon_object_latlng: Instance of `shapely.geometry.Polygon`, with vertices in lat-long coordinates. storm_object_table.polygon_object_rowcol: Instance of `shapely.geometry.Polygon`, with vertices in row-column coordinates. :param pickle_file_name: Path to output file. """ distance_buffer_column_names = get_distance_buffer_columns( storm_object_table) if distance_buffer_column_names is None: distance_buffer_column_names = [] columns_to_write = MANDATORY_COLUMNS + distance_buffer_column_names best_track_column_present_flags = numpy.array( [c in list(storm_object_table) for c in BEST_TRACK_COLUMNS]) if numpy.all(best_track_column_present_flags): columns_to_write += BEST_TRACK_COLUMNS file_system_utils.mkdir_recursive_if_necessary(file_name=pickle_file_name) pickle_file_handle = open(pickle_file_name, 'wb') pickle.dump(storm_object_table[columns_to_write], pickle_file_handle) pickle_file_handle.close()
def download_file_via_passwordless_ssh(host_name=None, user_name=None, remote_file_name=None, local_file_name=None, raise_error_if_fails=True): """Downloads file via passwordless SSH. For this to work, the remote machine (from which you are downloading) must have the RSA key of the local machine. See the following page for instructions on sharing RSA keys: http://www.linuxproblem.org/art_9.html :param host_name: Name of remote machine (example: "thunderhoser.ou.edu"). :param user_name: User name on remote machine (example: "thunderhoser"). :param remote_file_name: File path on remote machine (where the file will be downloaded from). :param local_file_name: File path on local machine (where the file will be stored). :param raise_error_if_fails: Boolean flag. If raise_error_if_fails = True and download fails, will raise an error. :return: local_file_name: If raise_error_if_fails = False and download failed, this will be None. Otherwise, this will be the same as input. :raises: ValueError: if download failed and raise_error_if_fails = True. """ # TODO(thunderhoser): Handle exceptions more intelligently. Currently, if # the download fails, this method does not know why it failed. If the # download failed because the file does not exist, this is less severe than # if it failed because we can't login to the remote machine. error_checking.assert_is_string(host_name) error_checking.assert_is_string(user_name) error_checking.assert_is_string(remote_file_name) error_checking.assert_is_string(local_file_name) error_checking.assert_is_boolean(raise_error_if_fails) file_system_utils.mkdir_recursive_if_necessary(file_name=local_file_name) unix_command_string = ( 'LD_LIBRARY_PATH= rsync -rv -e "{0:s}" {1:s}@{2:s}:"{3:s}" "{4:s}"' ).format(SSH_ARG_STRING, user_name, host_name, remote_file_name, local_file_name) devnull_handle = open(os.devnull, 'w') subprocess.call(unix_command_string, shell=True, stdout=devnull_handle, stderr=devnull_handle) if not os.path.isfile(local_file_name): info_string = ('Download failed. Local file expected at: ' + local_file_name) if raise_error_if_fails: raise ValueError(info_string) else: warnings.warn(info_string) local_file_name = None return local_file_name
def _run(input_file_name, plot_soundings, allow_whitespace, plot_panel_names, add_titles, label_colour_bars, colour_bar_length, output_dir_name): """Plots PMM composite over many examples (storm objects). This is effectively the main method. :param input_file_name: See documentation at top of file. :param plot_soundings: Same. :param allow_whitespace: Same. :param plot_panel_names: Same. :param add_titles: Same. :param label_colour_bars: Same. :param colour_bar_length: Same. :param output_dir_name: Same. """ file_system_utils.mkdir_recursive_if_necessary( directory_name=output_dir_name) print('Reading data from: "{0:s}"...'.format(input_file_name)) pickle_file_handle = open(input_file_name, 'rb') input_dict = pickle.load(pickle_file_handle) pickle_file_handle.close() mean_predictor_matrices = input_dict[MEAN_PREDICTOR_MATRICES_KEY] for i in range(len(mean_predictor_matrices)): mean_predictor_matrices[i] = numpy.expand_dims( mean_predictor_matrices[i], axis=0 ) model_file_name = input_dict[MODEL_FILE_KEY] model_metafile_name = '{0:s}/model_metadata.p'.format( os.path.split(model_file_name)[0] ) print('Reading metadata from: "{0:s}"...'.format(model_metafile_name)) model_metadata_dict = cnn.read_model_metadata(model_metafile_name) model_metadata_dict[cnn.TRAINING_OPTION_DICT_KEY][ trainval_io.UPSAMPLE_REFLECTIVITY_KEY ] = False mean_sounding_pressures_pascals = input_dict[MEAN_SOUNDING_PRESSURES_KEY] sounding_pressure_matrix_pascals = numpy.reshape( mean_sounding_pressures_pascals, (1, len(mean_sounding_pressures_pascals)) ) plot_input_examples.plot_examples( list_of_predictor_matrices=mean_predictor_matrices, model_metadata_dict=model_metadata_dict, pmm_flag=True, output_dir_name=output_dir_name, plot_soundings=plot_soundings, sounding_pressure_matrix_pascals=sounding_pressure_matrix_pascals, allow_whitespace=allow_whitespace, plot_panel_names=plot_panel_names, add_titles=add_titles, label_colour_bars=label_colour_bars, colour_bar_length=colour_bar_length)
def write_results(result_dict, pickle_file_name): """Writes results to Pickle file. :param result_dict: Dictionary created by `run_sfs`. :param pickle_file_name: Path to output file. """ file_system_utils.mkdir_recursive_if_necessary(file_name=pickle_file_name) pickle_file_handle = open(pickle_file_name, 'wb') pickle.dump(result_dict, pickle_file_handle) pickle_file_handle.close()
def write_model_for_each_class(model_object_by_class, pickle_file_name): """Writes models to Pickle file. :param model_object_by_class: See documentation for `train_model_for_each_class`. :param pickle_file_name: Path to output file. """ file_system_utils.mkdir_recursive_if_necessary(file_name=pickle_file_name) pickle_file_handle = open(pickle_file_name, 'wb') pickle.dump(model_object_by_class, pickle_file_handle) pickle_file_handle.close()
def _plot_kernel(): """Plots convolutional kernel. J = number of rows in kernel K = number of columns in kernel :return: kernel_matrix: Kernel as J-by-K numpy array. """ kernel_matrix = numpy.random.choice(a=POSSIBLE_KERNEL_VALUES, size=(NUM_ROWS_IN_KERNEL, NUM_COLUMNS_IN_KERNEL), replace=True) dummy_matrix = numpy.full((NUM_ROWS_IN_KERNEL, NUM_COLUMNS_IN_KERNEL), HIGHLIGHTED_VALUE) _, axes_object = pyplot.subplots(1, 1, figsize=(FIGURE_WIDTH_INCHES, FIGURE_HEIGHT_INCHES)) pyplot.imshow(dummy_matrix, cmap=COLOUR_MAP_OBJECT, vmin=HIGHLIGHTED_VALUE - 1, vmax=HIGHLIGHTED_VALUE, axes=axes_object, origin='upper') pyplot.xticks([], []) pyplot.yticks([], []) for i in range(kernel_matrix.shape[1]): for j in range(kernel_matrix.shape[0]): axes_object.text(i, j, '{0:.1f}'.format(kernel_matrix[j, i]), fontsize=OVERLAY_FONT_SIZE, color=MAIN_COLOUR, horizontalalignment='center', verticalalignment='center') plotting_utils.annotate_axes(axes_object=axes_object, annotation_string='(b)', font_colour=ANNOTATION_COLOUR) print 'Saving figure to: "{0:s}"...'.format(KERNEL_FILE_NAME) file_system_utils.mkdir_recursive_if_necessary(file_name=KERNEL_FILE_NAME) pyplot.savefig(KERNEL_FILE_NAME, dpi=OUTPUT_RESOLUTION_DPI) pyplot.close() imagemagick_utils.trim_whitespace(input_file_name=KERNEL_FILE_NAME, output_file_name=KERNEL_FILE_NAME) return kernel_matrix
def _extract_single_field_to_file(grib_file_name, grib1_field_name=None, output_file_name=None, wgrib_exe_name=WGRIB_EXE_NAME_DEFAULT, wgrib2_exe_name=WGRIB2_EXE_NAME_DEFAULT, raise_error_if_fails=True): """Extracts single field from grib1 or grib2 file; writes to text file. A "single field" is one variable at one time step and all grid cells. :param grib_file_name: Path to input (grib1 or grib2) file. :param grib1_field_name: Field name in grib1 format (example: 500-mb height is "HGT:500 mb"). :param output_file_name: Path to output file. :param wgrib_exe_name: Path to wgrib executable. :param wgrib2_exe_name: Path to wgrib2 executable. :param raise_error_if_fails: Boolean flag. If command fails and raise_error_if_fails = True, will raise an error. :return: success: Boolean flag. If command succeeded, this is True. If command failed and raise_error_if_fails = False, this is False. :raises: OSError: if command fails and raise_error_if_fails = True. """ grib_file_type = _get_file_type(grib_file_name) file_system_utils.mkdir_recursive_if_necessary(file_name=output_file_name) if grib_file_type == GRIB1_FILE_TYPE: command_string = ('"' + wgrib_exe_name + '" "' + grib_file_name + '" -s | grep -w "' + grib1_field_name + '" | "' + wgrib_exe_name + '" -i "' + grib_file_name + '" -text -nh -o "' + output_file_name + '"') else: command_string = ('"' + wgrib2_exe_name + '" "' + grib_file_name + '" -s | grep -w "' + _field_name_grib1_to_grib2(grib1_field_name) + '" | "' + wgrib2_exe_name + '" -i "' + grib_file_name + '" -no_header -text "' + output_file_name + '"') try: subprocess.call(command_string, shell=True) except OSError as this_exception: if raise_error_if_fails: raise warn_string = ( '\n\n' + command_string + '\n\nCommand (shown above) failed (details shown below).\n\n' + str(this_exception)) warnings.warn(warn_string) return False return True
def write_model(model_object, pickle_file_name): """Writes model to Pickle file. :param model_object: Instance (preferably trained) of `xgboost.XGBClassifier`. :param pickle_file_name: Path to output file. """ file_system_utils.mkdir_recursive_if_necessary(file_name=pickle_file_name) pickle_file_handle = open(pickle_file_name, 'wb') pickle.dump(model_object, pickle_file_handle) pickle_file_handle.close()
def _run(): """Makes schema for storm-velocity estimation. This is effectively the main method. """ file_system_utils.mkdir_recursive_if_necessary(file_name=OUTPUT_FILE_NAME) storm_object_table = _create_tracking_data() _plot_schema(storm_object_table=storm_object_table, output_file_name=OUTPUT_FILE_NAME)