def _run(input_warning_file_name, top_tracking_dir_name, spc_date_string, max_distance_metres, min_lifetime_fraction, output_warning_file_name): """Links each NWS tornado warning to nearest storm. This is effectively the main method. :param input_warning_file_name: See documentation at top of file. :param top_tracking_dir_name: Same. :param spc_date_string: Same. :param max_distance_metres: Same. :param min_lifetime_fraction: Same. :param output_warning_file_name: Same. """ error_checking.assert_is_greater(max_distance_metres, 0.) error_checking.assert_is_greater(min_lifetime_fraction, 0.) error_checking.assert_is_leq(min_lifetime_fraction, 1.) print('Reading warnings from: "{0:s}"...'.format(input_warning_file_name)) this_file_handle = open(input_warning_file_name, 'rb') warning_table = pickle.load(this_file_handle) this_file_handle.close() date_start_time_unix_sec = ( time_conversion.get_start_of_spc_date(spc_date_string)) date_end_time_unix_sec = ( time_conversion.get_end_of_spc_date(spc_date_string)) warning_table = warning_table.loc[ (warning_table[WARNING_START_TIME_KEY] >= date_start_time_unix_sec) & (warning_table[WARNING_START_TIME_KEY] <= date_end_time_unix_sec)] num_warnings = len(warning_table.index) print('Number of warnings beginning on SPC date "{0:s}" = {1:d}'.format( spc_date_string, num_warnings)) warning_polygon_objects_xy = [None] * num_warnings nested_array = warning_table[[ WARNING_START_TIME_KEY, WARNING_START_TIME_KEY ]].values.tolist() warning_table = warning_table.assign( **{ WARNING_XY_POLYGON_KEY: warning_polygon_objects_xy, LINKED_SECONDARY_IDS_KEY: nested_array }) for k in range(num_warnings): warning_table[LINKED_SECONDARY_IDS_KEY].values[k] = [] this_object_latlng = warning_table[WARNING_LATLNG_POLYGON_KEY].values[ k] warning_table[WARNING_XY_POLYGON_KEY].values[k], _ = ( polygons.project_latlng_to_xy( polygon_object_latlng=this_object_latlng, projection_object=PROJECTION_OBJECT)) tracking_file_names = [] for i in [-1, 0, 1]: this_spc_date_string = time_conversion.time_to_spc_date_string( date_start_time_unix_sec + i * NUM_SECONDS_PER_DAY) # tracking_file_names += tracking_io.find_files_one_spc_date( # top_tracking_dir_name=top_tracking_dir_name, # tracking_scale_metres2=DUMMY_TRACKING_SCALE_METRES2, # source_name=tracking_utils.SEGMOTION_NAME, # spc_date_string=this_spc_date_string, # raise_error_if_missing=i == 0 # )[0] tracking_file_names += tracking_io.find_files_one_spc_date( top_tracking_dir_name=top_tracking_dir_name, tracking_scale_metres2=DUMMY_TRACKING_SCALE_METRES2, source_name=tracking_utils.SEGMOTION_NAME, spc_date_string=this_spc_date_string, raise_error_if_missing=False)[0] if len(tracking_file_names) == 0: _write_linked_warnings(warning_table=warning_table, output_file_name=output_warning_file_name) return print(SEPARATOR_STRING) storm_object_table = tracking_io.read_many_files(tracking_file_names) print(SEPARATOR_STRING) if len(storm_object_table.index) == 0: _write_linked_warnings(warning_table=warning_table, output_file_name=output_warning_file_name) return storm_object_table = linkage._project_storms_latlng_to_xy( storm_object_table=storm_object_table, projection_object=PROJECTION_OBJECT) for k in range(num_warnings): this_start_time_string = time_conversion.unix_sec_to_string( warning_table[WARNING_START_TIME_KEY].values[k], LOG_MESSAGE_TIME_FORMAT) this_end_time_string = time_conversion.unix_sec_to_string( warning_table[WARNING_END_TIME_KEY].values[k], LOG_MESSAGE_TIME_FORMAT) print('Attempting to link warning from {0:s} to {1:s}...'.format( this_start_time_string, this_end_time_string)) warning_table[LINKED_SECONDARY_IDS_KEY].values[k] = _link_one_warning( warning_table=warning_table.iloc[[k]], storm_object_table=copy.deepcopy(storm_object_table), max_distance_metres=max_distance_metres, min_lifetime_fraction=min_lifetime_fraction) print('\n') _write_linked_warnings(warning_table=warning_table, output_file_name=output_warning_file_name)
def pixel_columns_to_grid_columns(pixel_column_by_vertex, num_pixel_columns, num_panel_columns, num_grid_columns, assert_same_panel): """Converts pixel columns to grid columns. V = number of vertices in object :param pixel_column_by_vertex: length-V numpy array with column coordinates of vertices in pixel space. :param num_pixel_columns: Total number of pixel columns in image. :param num_panel_columns: Total number of panel columns in image. :param num_grid_columns: Total number of columns in grid (one grid per panel). :param assert_same_panel: Boolean flag. If True, all vertices must be in the same panel. :return: grid_column_by_vertex: length-V numpy array with column coordinates (floats) of vertices in grid space. :return: panel_column_by_vertex: length-V numpy array with column coordinates (integers) of vertices in panel space. """ error_checking.assert_is_integer(num_pixel_columns) error_checking.assert_is_greater(num_pixel_columns, 0) error_checking.assert_is_integer(num_panel_columns) error_checking.assert_is_greater(num_panel_columns, 0) error_checking.assert_is_integer(num_grid_columns) error_checking.assert_is_greater(num_grid_columns, 0) error_checking.assert_is_boolean(assert_same_panel) error_checking.assert_is_numpy_array(pixel_column_by_vertex, num_dimensions=1) pixel_column_by_vertex += 0.5 error_checking.assert_is_geq_numpy_array(pixel_column_by_vertex, 0.) error_checking.assert_is_leq_numpy_array(pixel_column_by_vertex, num_pixel_columns) panel_column_to_first_px_column = {} for j in range(num_panel_columns): panel_column_to_first_px_column[j] = (j * float(num_pixel_columns) / num_panel_columns) panel_column_by_vertex = numpy.floor(pixel_column_by_vertex * float(num_panel_columns) / num_pixel_columns).astype(int) panel_column_by_vertex[panel_column_by_vertex == num_panel_columns] = num_panel_columns - 1 if assert_same_panel and len(numpy.unique(panel_column_by_vertex)) > 1: error_string = ( 'Object is in multiple panels. Panel columns listed below.\n{0:s}' ).format(str(panel_column_by_vertex)) raise ValueError(error_string) num_vertices = len(pixel_column_by_vertex) for i in range(num_vertices): pixel_column_by_vertex[i] = ( pixel_column_by_vertex[i] - panel_column_to_first_px_column[panel_column_by_vertex[i]]) grid_column_by_vertex = -0.5 + (pixel_column_by_vertex * float( num_grid_columns * num_panel_columns) / num_pixel_columns) return grid_column_by_vertex, panel_column_by_vertex
def plot_latlng_grid(axes_object, probability_matrix, min_latitude_deg, min_longitude_deg, latitude_spacing_deg, longitude_spacing_deg, colour_map=None, colour_minimum=None, colour_maximum=None): """Plots lat-long grid of probabilities. Because this method plots a lat-long grid, rather than an x-y grid, the projection used for the basemap must be cylindrical equidistant (which is the same as a lat-long projection). M = number of rows (unique grid-point latitudes) N = number of columns (unique grid-point longitudes) All probabilities (in `probability_matrix`, `colour_minimum`, and `colour_maximum`) should be dimensionless, ranging from 0...1. :param axes_object: Instance of `matplotlib.axes._subplots.AxesSubplot`. :param probability_matrix: M-by-N numpy array of probabilities. Latitude should increase while traveling down each column, and longitude should increase while traveling to the right along each row. :param min_latitude_deg: Minimum latitude over all grid points (deg N). :param min_longitude_deg: Minimum longitude over all grid points (deg E). :param latitude_spacing_deg: Spacing between meridionally adjacent grid points (i.e., between adjacent rows). :param longitude_spacing_deg: Spacing between zonally adjacent grid points (i.e., between adjacent columns). :param colour_map: Instance of `matplotlib.pyplot.cm`. If None, this method will use _get_default_colour_map. :param colour_minimum: Minimum value for colour map. :param colour_maximum: Maximum value for colour map. """ (probability_matrix_at_edges, grid_cell_edge_latitudes_deg, grid_cell_edge_longitudes_deg) = grids.latlng_field_grid_points_to_edges( field_matrix=probability_matrix, min_latitude_deg=min_latitude_deg, min_longitude_deg=min_longitude_deg, lat_spacing_deg=latitude_spacing_deg, lng_spacing_deg=longitude_spacing_deg) probability_matrix_at_edges = numpy.ma.masked_where( numpy.isnan(probability_matrix_at_edges), probability_matrix_at_edges) if colour_map is None: colour_map, colour_norm_object, _ = _get_default_colour_map() colour_minimum = colour_norm_object.boundaries[0] colour_maximum = colour_norm_object.boundaries[-1] else: error_checking.assert_is_greater(colour_maximum, colour_minimum) colour_norm_object = None pyplot.pcolormesh(grid_cell_edge_longitudes_deg, grid_cell_edge_latitudes_deg, probability_matrix_at_edges, cmap=colour_map, norm=colour_norm_object, vmin=colour_minimum, vmax=colour_maximum, shading='flat', edgecolors='None', axes=axes_object)
def plot_2d_feature_map(feature_matrix, axes_object, colour_map_object, font_size=DEFAULT_FONT_SIZE, colour_norm_object=None, min_colour_value=None, max_colour_value=None, annotation_string=None): """Plots 2-D feature map. M = number of rows in grid N = number of columns in grid :param feature_matrix: M-by-N numpy array of feature values (either before or after activation function -- this method doesn't care). :param axes_object: Instance of `matplotlib.axes._subplots.AxesSubplot`. :param font_size: Font size for annotation. :param colour_map_object: Instance of `matplotlib.pyplot.cm`. :param colour_norm_object: Instance of `matplotlib.colors.BoundaryNorm`. :param min_colour_value: [used only if `colour_norm_object is None`] Minimum value in colour scheme. :param max_colour_value: [used only if `colour_norm_object is None`] Max value in colour scheme. :param annotation_string: Annotation (printed in the bottom-center of the map). For no annotation, leave this alone. """ error_checking.assert_is_numpy_array_without_nan(feature_matrix) error_checking.assert_is_numpy_array(feature_matrix, num_dimensions=2) if colour_norm_object is None: error_checking.assert_is_greater(max_colour_value, min_colour_value) colour_norm_object = None else: if hasattr(colour_norm_object, 'boundaries'): min_colour_value = colour_norm_object.boundaries[0] max_colour_value = colour_norm_object.boundaries[-1] else: min_colour_value = colour_norm_object.vmin max_colour_value = colour_norm_object.vmax axes_object.pcolormesh(feature_matrix, cmap=colour_map_object, norm=colour_norm_object, vmin=min_colour_value, vmax=max_colour_value, shading='flat', edgecolors='None') if annotation_string is not None: error_checking.assert_is_string(annotation_string) axes_object.text(0.5, 0.01, annotation_string, fontsize=font_size, fontweight='bold', color='black', horizontalalignment='center', verticalalignment='bottom', transform=axes_object.transAxes) axes_object.set_xticks([]) axes_object.set_yticks([])
def get_echo_tops( unix_time_sec, spc_date_string, top_directory_name, critical_reflectivity_dbz, top_height_to_consider_m_asl=DEFAULT_TOP_INPUT_HEIGHT_FOR_ECHO_TOPS_M_ASL, lowest_refl_to_consider_dbz=None): """Finds echo top at each horizontal location. "Echo top" is max height with reflectivity >= critical reflectivity. M = number of rows (unique grid-point latitudes) N = number of columns (unique grid-point longitudes) :param unix_time_sec: Valid time. :param spc_date_string: SPC date (format "yyyymmdd"). :param top_directory_name: Name of top-level directory with MYRORSS files. :param critical_reflectivity_dbz: Critical reflectivity (used to define echo top). :param top_height_to_consider_m_asl: Top height level to consider (metres above sea level). :param lowest_refl_to_consider_dbz: Lowest reflectivity to consider in echo top calculations. If None, will consider all reflectivities. :return: echo_top_matrix_m_asl: M-by-N matrix of echo tops (metres above sea level). Latitude increases down each column, and longitude increases to the right along each row. :return: grid_point_latitudes_deg: length-M numpy array with latitudes (deg N) of grid points, sorted in ascending order. :return: grid_point_longitudes_deg: length-N numpy array with longitudes (deg E) of grid points, sorted in ascending order. :return: metadata_dict: Dictionary created by `myrorss_and_mrms_io.read_metadata_from_raw_file` for column-max reflectivity. """ error_checking.assert_is_greater(critical_reflectivity_dbz, 0.) error_checking.assert_is_greater(top_height_to_consider_m_asl, 0) top_height_to_consider_m_asl = int( numpy.round(top_height_to_consider_m_asl)) if lowest_refl_to_consider_dbz is None: lowest_refl_to_consider_dbz = 0. error_checking.assert_is_less_than(lowest_refl_to_consider_dbz, critical_reflectivity_dbz) grid_point_heights_m_asl = radar_utils.get_valid_heights( data_source=radar_utils.MYRORSS_SOURCE_ID, field_name=radar_utils.REFL_NAME) grid_point_heights_m_asl = grid_point_heights_m_asl[ grid_point_heights_m_asl <= top_height_to_consider_m_asl] column_max_refl_file_name = myrorss_and_mrms_io.find_raw_file( unix_time_sec=unix_time_sec, spc_date_string=spc_date_string, field_name=radar_utils.REFL_COLUMN_MAX_NAME, data_source=radar_utils.MYRORSS_SOURCE_ID, top_directory_name=top_directory_name) num_grid_heights = len(grid_point_heights_m_asl) single_height_refl_file_names = [''] * num_grid_heights for k in range(num_grid_heights): single_height_refl_file_names[k] = myrorss_and_mrms_io.find_raw_file( unix_time_sec=unix_time_sec, spc_date_string=spc_date_string, field_name=radar_utils.REFL_NAME, data_source=radar_utils.MYRORSS_SOURCE_ID, top_directory_name=top_directory_name, height_m_asl=grid_point_heights_m_asl[k]) print('Reading "{0:s}" for echo-top calculation...'.format( column_max_refl_file_name)) metadata_dict = myrorss_and_mrms_io.read_metadata_from_raw_file( column_max_refl_file_name, data_source=radar_utils.MYRORSS_SOURCE_ID) this_sparse_grid_table = ( myrorss_and_mrms_io.read_data_from_sparse_grid_file( column_max_refl_file_name, field_name_orig=metadata_dict[ myrorss_and_mrms_io.FIELD_NAME_COLUMN_ORIG], data_source=radar_utils.MYRORSS_SOURCE_ID, sentinel_values=metadata_dict[radar_utils.SENTINEL_VALUE_COLUMN])) (column_max_refl_matrix_dbz, grid_point_latitudes_deg, grid_point_longitudes_deg) = radar_s2f.sparse_to_full_grid( this_sparse_grid_table, metadata_dict) num_grid_rows = len(grid_point_latitudes_deg) num_grid_columns = len(grid_point_longitudes_deg) linear_indices_to_consider = numpy.where( numpy.reshape(column_max_refl_matrix_dbz, num_grid_rows * num_grid_columns) >= critical_reflectivity_dbz)[0] print( ('Echo-top calculation is needed at only {0:d}/{1:d} horizontal grid ' 'points!').format(len(linear_indices_to_consider), num_grid_rows * num_grid_columns)) echo_top_matrix_m_asl = numpy.full((num_grid_rows, num_grid_columns), numpy.nan) num_horiz_points_to_consider = len(linear_indices_to_consider) if num_horiz_points_to_consider == 0: return echo_top_matrix_m_asl grid_rows_to_consider, grid_columns_to_consider = numpy.unravel_index( linear_indices_to_consider, (num_grid_rows, num_grid_columns)) reflectivity_matrix_dbz = numpy.full( (num_grid_heights, num_horiz_points_to_consider), numpy.nan) for k in range(num_grid_heights): print('Reading "{0:s}" for echo-top calculation...'.format( single_height_refl_file_names[k])) this_metadata_dict = myrorss_and_mrms_io.read_metadata_from_raw_file( single_height_refl_file_names[k], data_source=radar_utils.MYRORSS_SOURCE_ID) this_sparse_grid_table = ( myrorss_and_mrms_io.read_data_from_sparse_grid_file( single_height_refl_file_names[k], field_name_orig=this_metadata_dict[ myrorss_and_mrms_io.FIELD_NAME_COLUMN_ORIG], data_source=radar_utils.MYRORSS_SOURCE_ID, sentinel_values=this_metadata_dict[ radar_utils.SENTINEL_VALUE_COLUMN])) this_reflectivity_matrix_dbz, _, _ = radar_s2f.sparse_to_full_grid( this_sparse_grid_table, this_metadata_dict, ignore_if_below=lowest_refl_to_consider_dbz) reflectivity_matrix_dbz[k, :] = this_reflectivity_matrix_dbz[ grid_rows_to_consider, grid_columns_to_consider] print('Computing echo tops at the {0:d} horizontal grid points...'.format( num_horiz_points_to_consider)) for i in range(num_horiz_points_to_consider): echo_top_matrix_m_asl[ grid_rows_to_consider[i], grid_columns_to_consider[i]] = ( radar_utils.get_echo_top_single_column( reflectivities_dbz=reflectivity_matrix_dbz[:, i], heights_m_asl=grid_point_heights_m_asl, critical_reflectivity_dbz=critical_reflectivity_dbz)) return (numpy.flipud(echo_top_matrix_m_asl), grid_point_latitudes_deg[::-1], grid_point_longitudes_deg, metadata_dict)
def plot_colour_bar(axes_object_or_matrix, data_matrix, colour_map_object, colour_norm_object, orientation_string=DEFAULT_CBAR_ORIENTATION_STRING, padding=None, extend_min=True, extend_max=True, fraction_of_axis_length=1., font_size=FONT_SIZE, aspect_ratio=20.): """Plots colour bar. :param axes_object_or_matrix: Either one axis handle (instance of `matplotlib.axes._subplots.AxesSubplot`) or a numpy array thereof. :param data_matrix: numpy array of values to which the colour map applies. :param colour_map_object: Colour map (instance of `matplotlib.pyplot.cm` or similar). :param colour_norm_object: Colour normalization (maps from data space to colour-bar space, which goes from 0...1). This should be an instance of `matplotlib.colors.Normalize`. :param orientation_string: Orientation ("vertical" or "horizontal"). :param padding: Padding between colour bar and main plot (in range 0...1). To use the default (there are different defaults for vertical and horiz colour bars), leave this alone. :param extend_min: Boolean flag. If True, values below the minimum specified by `colour_norm_object` are possible, so the colour bar will be plotted with an arrow at the bottom. :param extend_max: Boolean flag. If True, values above the max specified by `colour_norm_object` are possible, so the colour bar will be plotted with an arrow at the top. :param fraction_of_axis_length: The colour bar will take up this fraction of the axis length (x-axis if orientation_string = "horizontal", y-axis if orientation_string = "vertical"). :param font_size: Font size for tick marks on colour bar. :param aspect_ratio: Ratio of length to width. :return: colour_bar_object: Colour-bar handle (instance of `matplotlib.pyplot.colorbar`). """ error_checking.assert_is_real_numpy_array(data_matrix) error_checking.assert_is_boolean(extend_min) error_checking.assert_is_boolean(extend_max) error_checking.assert_is_greater(fraction_of_axis_length, 0.) # error_checking.assert_is_leq(fraction_of_axis_length, 1.) scalar_mappable_object = pyplot.cm.ScalarMappable(cmap=colour_map_object, norm=colour_norm_object) scalar_mappable_object.set_array(data_matrix) if extend_min and extend_max: extend_arg = 'both' elif extend_min: extend_arg = 'min' elif extend_max: extend_arg = 'max' else: extend_arg = 'neither' if padding is None: if orientation_string == 'horizontal': padding = HORIZONTAL_CBAR_PADDING else: padding = VERTICAL_CBAR_PADDING # error_checking.assert_is_geq(padding, 0.) # error_checking.assert_is_leq(padding, 1.) error_checking.assert_is_real_number(padding) if isinstance(axes_object_or_matrix, numpy.ndarray): axes_arg = axes_object_or_matrix.ravel().tolist() else: axes_arg = axes_object_or_matrix colour_bar_object = pyplot.colorbar(ax=axes_arg, mappable=scalar_mappable_object, orientation=orientation_string, pad=padding, extend=extend_arg, shrink=fraction_of_axis_length, aspect=aspect_ratio) colour_bar_object.ax.tick_params(labelsize=font_size) if orientation_string == 'horizontal': colour_bar_object.ax.set_xticklabels( colour_bar_object.ax.get_xticklabels(), rotation=90) return colour_bar_object
def do_novelty_detection( list_of_baseline_input_matrices, list_of_trial_input_matrices, cnn_model_object, cnn_feature_layer_name, upconvnet_model_object, num_novel_examples, multipass=False, percent_svd_variance_to_keep=DEFAULT_PCT_VARIANCE_TO_KEEP): """Runs novelty detection. I = number of input tensors to the CNN B = number of baseline examples T = number of trial examples This method assumes that both `list_of_baseline_input_matrices` and `list_of_trial_input_matrices` are normalized. :param list_of_baseline_input_matrices: length-I list of numpy arrays, where the [i]th array is the [i]th input matrix to the CNN. The first axis of each array must have length B. :param list_of_trial_input_matrices: Same, except the first axis of each array must have length T. :param cnn_model_object: Trained CNN (instance of `keras.models.Model` or `keras.models.Sequential`). :param cnn_feature_layer_name: Name of feature layer in CNN. Outputs of this layer will be inputs to the upconvnet. :param upconvnet_model_object: Trained upconvnet (instance of `keras.models.Model` or `keras.models.Sequential`). :param num_novel_examples: Number of novel trial examples to find. This method will find the N most novel trial examples, where N = `num_novel_examples`. :param multipass: Boolean flag. If True, will run multi-pass version. If False, will run single-pass version. In the multi-pass version, whenever the next-most novel trial example is found, it is used to fit a new SVD model. In other words, after finding the [k]th-most novel trial example, a new SVD model is fit on all baseline examples and the k most novel trial examples. :param percent_svd_variance_to_keep: See doc for `_fit_svd`. :return: novelty_dict: Dictionary with the following keys, letting Q = `num_novel_examples`. novelty_dict['list_of_baseline_input_matrices']: Same as input. novelty_dict['list_of_trial_input_matrices']: Same as input. novelty_dict['novel_indices']: length-Q numpy array with indices of novel examples, where novel_indices[k] is the index of the [k]th-most novel example. These are indices into the first axis of each array in `list_of_trial_input_matrices`. novelty_dict['novel_image_matrix_upconv']: numpy array with upconvnet reconstructions of the most novel examples. The first axis has length Q. novelty_dict['novel_image_matrix_upconv_svd']: numpy array with upconvnet reconstructions of SVD reconstructions of the most novel examples. Same dimensions as `novel_image_matrix_upconv`. novelty_dict['percent_svd_variance_to_keep']: Same as input. novelty_dict['cnn_feature_layer_name']: Same as input. novelty_dict['multipass']: Same as input. """ baseline_feature_matrix = _apply_cnn( cnn_model_object=cnn_model_object, list_of_predictor_matrices=list_of_baseline_input_matrices, output_layer_name=cnn_feature_layer_name, verbose=True) print '\n' trial_feature_matrix = _apply_cnn( cnn_model_object=cnn_model_object, list_of_predictor_matrices=list_of_trial_input_matrices, output_layer_name=cnn_feature_layer_name, verbose=True) print '\n' num_trial_examples = trial_feature_matrix.shape[0] error_checking.assert_is_integer(num_novel_examples) error_checking.assert_is_greater(num_novel_examples, 0) error_checking.assert_is_leq(num_novel_examples, num_trial_examples) error_checking.assert_is_boolean(multipass) svd_dictionary = None novel_indices = numpy.array([], dtype=int) novel_image_matrix_upconv = None novel_image_matrix_upconv_svd = None for k in range(num_novel_examples): print 'Finding {0:d}th-most novel trial example...'.format( k + 1, num_novel_examples) fit_new_svd = multipass or k == 0 if fit_new_svd: this_baseline_feature_matrix = numpy.concatenate( (baseline_feature_matrix, trial_feature_matrix[novel_indices, ...]), axis=0) this_trial_feature_matrix = numpy.delete(trial_feature_matrix, obj=novel_indices, axis=0) svd_dictionary = _fit_svd( baseline_feature_matrix=this_baseline_feature_matrix, test_feature_matrix=this_trial_feature_matrix, percent_variance_to_keep=percent_svd_variance_to_keep) trial_svd_errors = numpy.full(num_trial_examples, numpy.nan) trial_feature_matrix_svd = numpy.full(trial_feature_matrix.shape, numpy.nan) for i in range(num_trial_examples): if i in novel_indices: continue trial_feature_matrix_svd[i, ...] = _apply_svd( feature_vector=trial_feature_matrix[i, ...], svd_dictionary=svd_dictionary) trial_svd_errors[i] = numpy.linalg.norm( trial_feature_matrix_svd[i, ...] - trial_feature_matrix[i, ...]) these_novel_indices = numpy.full(1, numpy.nanargmax(trial_svd_errors)) novel_indices = numpy.concatenate((novel_indices, these_novel_indices)) this_image_matrix_upconv = upconvnet_model_object.predict( trial_feature_matrix[these_novel_indices, ...], batch_size=1) this_image_matrix_upconv_svd = upconvnet_model_object.predict( trial_feature_matrix_svd[these_novel_indices, ...], batch_size=1) if novel_image_matrix_upconv is None: novel_image_matrix_upconv = this_image_matrix_upconv + 0. novel_image_matrix_upconv_svd = this_image_matrix_upconv_svd + 0. else: novel_image_matrix_upconv = numpy.concatenate( (novel_image_matrix_upconv, this_image_matrix_upconv), axis=0) novel_image_matrix_upconv_svd = numpy.concatenate( (novel_image_matrix_upconv_svd, this_image_matrix_upconv_svd), axis=0) return { BASELINE_INPUTS_KEY: list_of_baseline_input_matrices, TRIAL_INPUTS_KEY: list_of_trial_input_matrices, NOVEL_INDICES_KEY: novel_indices, NOVEL_IMAGES_UPCONV_KEY: novel_image_matrix_upconv, NOVEL_IMAGES_UPCONV_SVD_KEY: novel_image_matrix_upconv_svd, PERCENT_VARIANCE_KEY: percent_svd_variance_to_keep, CNN_FEATURE_LAYER_KEY: cnn_feature_layer_name, MULTIPASS_KEY: multipass }
def _read_examples(top_example_dir_name, first_time_string, last_time_string, num_times, num_examples_per_time, model_metadata_dict): """Reads learning examples. These and the trained model are the main inputs to the permutation test. :param top_example_dir_name: See documentation at top of file. :param first_time_string: Same. :param last_time_string: Same. :param num_times: Same. :param num_examples_per_time: Same. :param model_metadata_dict: Dictionary with metadata for trained model (created by `traditional_cnn.read_model_metadata`). :return: predictor_matrix: E-by-M-by-N-by-C numpy array of predictor values (images). :return: target_values: length-E numpy array of target values (integer class labels). """ error_checking.assert_is_greater(num_times, 0) error_checking.assert_is_geq(num_examples_per_time, 10) first_time_unix_sec = time_conversion.string_to_unix_sec( first_time_string, INPUT_TIME_FORMAT) last_time_unix_sec = time_conversion.string_to_unix_sec( last_time_string, INPUT_TIME_FORMAT) example_file_names = trainval_io.find_downsized_3d_example_files( top_directory_name=top_example_dir_name, shuffled=False, first_target_time_unix_sec=first_time_unix_sec, last_target_time_unix_sec=last_time_unix_sec) num_times = min([num_times, len(example_file_names)]) random.shuffle(example_file_names) example_file_names = example_file_names[:num_times] predictor_matrix = None target_matrix = None for i in range(num_times): print 'Reading data from: "{0:s}"...'.format(example_file_names[i]) this_example_dict = trainval_io.read_downsized_3d_examples( netcdf_file_name=example_file_names[i], predictor_names_to_keep=model_metadata_dict[ traditional_cnn.NARR_PREDICTOR_NAMES_KEY], num_half_rows_to_keep=model_metadata_dict[ traditional_cnn.NUM_ROWS_IN_HALF_GRID_KEY], num_half_columns_to_keep=model_metadata_dict[ traditional_cnn.NUM_COLUMNS_IN_HALF_GRID_KEY], first_time_to_keep_unix_sec=first_time_unix_sec, last_time_to_keep_unix_sec=last_time_unix_sec) this_num_examples_total = this_example_dict[ trainval_io.PREDICTOR_MATRIX_KEY].shape[0] this_num_examples_to_keep = min( [num_examples_per_time, this_num_examples_total]) these_example_indices = numpy.linspace(0, this_num_examples_total - 1, num=this_num_examples_total, dtype=int) these_example_indices = numpy.random.choice( these_example_indices, size=this_num_examples_to_keep, replace=False) this_predictor_matrix = this_example_dict[ trainval_io.PREDICTOR_MATRIX_KEY][these_example_indices, ...] this_target_matrix = this_example_dict[trainval_io.TARGET_MATRIX_KEY][ these_example_indices, ...] if predictor_matrix is None: predictor_matrix = this_predictor_matrix + 0. target_matrix = this_target_matrix + 0 else: predictor_matrix = numpy.concatenate( (predictor_matrix, this_predictor_matrix), axis=0) target_matrix = numpy.concatenate( (target_matrix, this_target_matrix), axis=0) num_examples_by_class = numpy.sum(target_matrix, axis=0) print 'Number of examples in each class: {0:s}\n'.format( str(num_examples_by_class)) return predictor_matrix, numpy.argmax(target_matrix, axis=1)
def _run(input_file_name, predictor_colour_map_name, min_colour_prctile_for_predictors, max_colour_prctile_for_predictors, saliency_colour_map_name, max_colour_prctile_for_saliency, saliency_contour_line_width, num_saliency_contours, output_dir_name): """Plots saliency maps. This is effectively the main method. :param input_file_name: See documentation at top of file. :param predictor_colour_map_name: Same. :param min_colour_prctile_for_predictors: Same. :param max_colour_prctile_for_predictors: Same. :param saliency_colour_map_name: Same. :param max_colour_prctile_for_saliency: Same. :param saliency_contour_line_width: Same. :param num_saliency_contours: Same. :param output_dir_name: Same. """ file_system_utils.mkdir_recursive_if_necessary( directory_name=output_dir_name) error_checking.assert_is_geq(min_colour_prctile_for_predictors, 0.) error_checking.assert_is_leq(max_colour_prctile_for_predictors, 100.) error_checking.assert_is_greater(max_colour_prctile_for_predictors, min_colour_prctile_for_predictors) error_checking.assert_is_geq(max_colour_prctile_for_saliency, 0.) error_checking.assert_is_leq(max_colour_prctile_for_saliency, 100.) error_checking.assert_is_geq(num_saliency_contours, 2) num_saliency_contours = 1 + int( number_rounding.floor_to_nearest(num_saliency_contours, 2)) half_num_saliency_contours = (num_saliency_contours - 1) / 2 predictor_colour_map_object = pyplot.cm.get_cmap(predictor_colour_map_name) saliency_colour_map_object = pyplot.cm.get_cmap(saliency_colour_map_name) print 'Reading data from: "{0:s}"...'.format(input_file_name) predictor_matrix, saliency_matrix, saliency_metadata_dict = ( saliency_maps.read_file(input_file_name)) model_metafile_name = traditional_cnn.find_metafile( model_file_name=saliency_metadata_dict[ saliency_maps.MODEL_FILE_NAME_KEY]) print 'Reading metadata from: "{0:s}"...'.format(model_metafile_name) model_metadata_dict = traditional_cnn.read_model_metadata( model_metafile_name) narr_predictor_names = model_metadata_dict[ traditional_cnn.NARR_PREDICTOR_NAMES_KEY] num_predictors = len(narr_predictor_names) num_examples = predictor_matrix.shape[0] for i in range(num_examples): this_min_cval_by_predictor = numpy.full(num_predictors, numpy.nan) this_max_cval_by_predictor = this_min_cval_by_predictor + 0. for k in range(num_predictors): this_min_cval_by_predictor[k] = numpy.percentile( predictor_matrix[i, ..., k], min_colour_prctile_for_predictors) this_max_cval_by_predictor[k] = numpy.percentile( predictor_matrix[i, ..., k], max_colour_prctile_for_predictors) _, these_axes_objects = example_plotting.plot_many_predictors_sans_barbs( predictor_matrix=predictor_matrix[i, ...], predictor_names=narr_predictor_names, cmap_object_by_predictor=[predictor_colour_map_object] * num_predictors, min_colour_value_by_predictor=this_min_cval_by_predictor, max_colour_value_by_predictor=this_max_cval_by_predictor) this_max_abs_contour_level = numpy.percentile( numpy.absolute(saliency_matrix[i, ...]), max_colour_prctile_for_saliency) this_contour_interval = (this_max_abs_contour_level / half_num_saliency_contours) saliency_plotting.plot_many_2d_grids( saliency_matrix_3d=saliency_matrix[i, ...], axes_objects_2d_list=these_axes_objects, colour_map_object=saliency_colour_map_object, max_absolute_contour_level=this_max_abs_contour_level, contour_interval=this_contour_interval, line_width=saliency_contour_line_width) this_figure_file_name = '{0:s}/example{1:06d}_saliency.jpg'.format( output_dir_name, i) print 'Saving figure to: "{0:s}"...'.format(this_figure_file_name) pyplot.savefig(this_figure_file_name, dpi=FIGURE_RESOLUTION_DPI) pyplot.close()
def write_model_metadata( cnn_file_name, cnn_feature_layer_name, num_epochs, num_examples_per_batch, num_training_batches_per_epoch, training_example_file_names, first_training_time_unix_sec, last_training_time_unix_sec, num_validation_batches_per_epoch, validation_example_file_names, first_validation_time_unix_sec, last_validation_time_unix_sec, pickle_file_name): """Writes metadata to Pickle file. :param cnn_file_name: Path to file with trained CNN (readable by `cnn.read_model`). :param cnn_feature_layer_name: See doc for `train_upconvnet`. :param num_epochs: Same. :param num_examples_per_batch: Same. :param num_training_batches_per_epoch: Same. :param training_example_file_names: Same. :param first_training_time_unix_sec: Same. :param last_training_time_unix_sec: Same. :param num_validation_batches_per_epoch: Same. :param validation_example_file_names: Same. :param first_validation_time_unix_sec: Same. :param last_validation_time_unix_sec: Same. :param pickle_file_name: Path to output file. :return: metadata_dict: Dictionary with the following keys. metadata_dict['cnn_file_name']: See doc for `read_model_metadata`. metadata_dict['cnn_feature_layer_name']: Same. metadata_dict['num_epochs']: Same. metadata_dict['num_examples_per_batch']: Same. metadata_dict['num_training_batches_per_epoch']: Same. metadata_dict['training_example_file_names']: Same. metadata_dict['first_training_time_unix_sec']: Same. metadata_dict['last_training_time_unix_sec']: Same. metadata_dict['num_validation_batches_per_epoch']: Same. metadata_dict['validation_example_file_names']: Same. metadata_dict['first_validation_time_unix_sec']: Same. metadata_dict['last_validation_time_unix_sec']: Same. """ error_checking.assert_is_string(cnn_file_name) error_checking.assert_is_string(cnn_feature_layer_name) error_checking.assert_is_integer(num_epochs) error_checking.assert_is_greater(num_epochs, 0) error_checking.assert_is_integer(num_examples_per_batch) error_checking.assert_is_greater(num_examples_per_batch, 0) error_checking.assert_is_integer(num_training_batches_per_epoch) error_checking.assert_is_greater(num_training_batches_per_epoch, 0) error_checking.assert_is_integer(num_validation_batches_per_epoch) error_checking.assert_is_greater(num_validation_batches_per_epoch, 0) error_checking.assert_is_string_list(training_example_file_names) error_checking.assert_is_numpy_array( numpy.array(training_example_file_names), num_dimensions=1) error_checking.assert_is_integer(first_training_time_unix_sec) error_checking.assert_is_integer(last_training_time_unix_sec) error_checking.assert_is_greater( last_training_time_unix_sec, first_training_time_unix_sec) error_checking.assert_is_string_list(validation_example_file_names) error_checking.assert_is_numpy_array( numpy.array(validation_example_file_names), num_dimensions=1) error_checking.assert_is_integer(first_validation_time_unix_sec) error_checking.assert_is_integer(last_validation_time_unix_sec) error_checking.assert_is_greater( last_validation_time_unix_sec, first_validation_time_unix_sec) metadata_dict = { CNN_FILE_KEY: cnn_file_name, CNN_FEATURE_LAYER_KEY: cnn_feature_layer_name, NUM_EPOCHS_KEY: num_epochs, NUM_EXAMPLES_PER_BATCH_KEY: num_examples_per_batch, NUM_TRAINING_BATCHES_KEY: num_training_batches_per_epoch, TRAINING_FILES_KEY: training_example_file_names, FIRST_TRAINING_TIME_KEY: first_training_time_unix_sec, LAST_TRAINING_TIME_KEY: last_training_time_unix_sec, NUM_VALIDATION_BATCHES_KEY: num_validation_batches_per_epoch, VALIDATION_FILES_KEY: validation_example_file_names, FIRST_VALIDATION_TIME_KEY: first_validation_time_unix_sec, LAST_VALIDATION_TIME_KEY: last_validation_time_unix_sec } file_system_utils.mkdir_recursive_if_necessary(file_name=pickle_file_name) pickle_file_handle = open(pickle_file_name, 'wb') pickle.dump(metadata_dict, pickle_file_handle) pickle_file_handle.close() return metadata_dict
def apply_upconvnet( cnn_input_matrices, cnn_model_object, cnn_feature_layer_name, ucn_model_object, num_examples_per_batch=1000, verbose=True): """Applies upconvnet to new radar images. :param cnn_input_matrices: 1-D list of input matrices to CNN. Each must be a numpy array. :param cnn_model_object: Trained CNN (instance of `keras.models.Model` or `keras.models.Sequential`). Will be used to convert images to feature vectors. :param cnn_feature_layer_name: Name of feature-generating layer in CNN. Feature vectors (inputs to upconvnet) will be outputs from this layer. :param ucn_model_object: Trained upconvnet (instance of `keras.models.Model` or `keras.models.Sequential`). Will be used to convert feature vectors back to images (ideally back to original images). :param num_examples_per_batch: Number of examples per batch. :param verbose: Boolean flag. If True, will print progress messages to command window. :return: reconstructed_radar_matrix: Reconstructed version of first input matrix to CNN. """ partial_cnn_model_object = cnn.model_to_feature_generator( model_object=cnn_model_object, feature_layer_name=cnn_feature_layer_name) error_checking.assert_is_boolean(verbose) num_examples = cnn_input_matrices[0].shape[0] if num_examples_per_batch is None: num_examples_per_batch = num_examples + 0 error_checking.assert_is_integer(num_examples_per_batch) error_checking.assert_is_greater(num_examples_per_batch, 0) num_examples_per_batch = min([num_examples_per_batch, num_examples]) reconstructed_radar_matrix = None for i in range(0, num_examples, num_examples_per_batch): j = i k = min([i + num_examples_per_batch - 1, num_examples - 1]) these_example_indices = numpy.linspace(j, k, num=k - j + 1, dtype=int) if verbose: print(( 'Applying upconvnet to examples {0:d}-{1:d} of {2:d}...' ).format( numpy.min(these_example_indices) + 1, numpy.max(these_example_indices) + 1, num_examples )) this_feature_matrix = partial_cnn_model_object.predict( [a[these_example_indices, ...] for a in cnn_input_matrices], batch_size=len(these_example_indices) ) this_reconstructed_matrix = ucn_model_object.predict( this_feature_matrix, batch_size=len(these_example_indices) ) if reconstructed_radar_matrix is None: dimensions = numpy.array( (num_examples,) + this_reconstructed_matrix.shape[1:], dtype=int ) reconstructed_radar_matrix = numpy.full(dimensions, numpy.nan) reconstructed_radar_matrix[ these_example_indices, ...] = this_reconstructed_matrix print('Have applied upconvnet to all {0:d} examples!'.format(num_examples)) return reconstructed_radar_matrix
def create_2d_net( num_input_features, first_spatial_dimensions, upsampling_factors, num_output_channels, l1_weight=DEFAULT_L1_WEIGHT, l2_weight=DEFAULT_L2_WEIGHT, use_transposed_conv=True, activation_function_name=None, alpha_for_elu=DEFAULT_ALPHA_FOR_ELU, alpha_for_relu=DEFAULT_ALPHA_FOR_RELU, use_activn_for_last_layer=False, use_batch_norm=True, use_batch_norm_for_last_layer=True): """Creates (but does not train) upconvnet with 2 spatial dimensions. L = number of main (transposed-conv or upsampling) layers :param num_input_features: Length of input feature vector. :param first_spatial_dimensions: length-2 numpy array of dimensions in first main layer. The order should be (num_rows, num_columns). Before it is passed to the first main layer, the feature vector will be reshaped into a grid with these dimensions. :param upsampling_factors: length-L numpy array of upsampling factors. :param num_output_channels: See doc for `create_3d_net`. :param l1_weight: Same. :param l2_weight: Same. :param use_transposed_conv: Same. :param activation_function_name: Same. :param alpha_for_elu: Same. :param alpha_for_relu: Same. :param use_activn_for_last_layer: Same. :param use_batch_norm: Same. :param use_batch_norm_for_last_layer: Same. :return: model_object: Same. """ # TODO(thunderhoser): This method assumes that the original CNN does # edge-padding. # Check input args. error_checking.assert_is_integer(num_input_features) error_checking.assert_is_greater(num_input_features, 0) error_checking.assert_is_integer(num_output_channels) error_checking.assert_is_greater(num_output_channels, 0) error_checking.assert_is_geq(l1_weight, 0.) error_checking.assert_is_geq(l2_weight, 0.) error_checking.assert_is_boolean(use_transposed_conv) error_checking.assert_is_boolean(use_activn_for_last_layer) error_checking.assert_is_boolean(use_batch_norm) error_checking.assert_is_boolean(use_batch_norm_for_last_layer) error_checking.assert_is_numpy_array( first_spatial_dimensions, exact_dimensions=numpy.array([2], dtype=int) ) error_checking.assert_is_integer_numpy_array(first_spatial_dimensions) error_checking.assert_is_greater_numpy_array(first_spatial_dimensions, 0) error_checking.assert_is_numpy_array(upsampling_factors, num_dimensions=1) error_checking.assert_is_integer_numpy_array(upsampling_factors) error_checking.assert_is_geq_numpy_array(upsampling_factors, 1) # Set up CNN architecture. regularizer_object = keras.regularizers.l1_l2(l1=l1_weight, l2=l2_weight) input_layer_object = keras.layers.Input(shape=(num_input_features,)) current_num_filters = int(numpy.round( num_input_features / numpy.prod(first_spatial_dimensions) )) first_dimensions = numpy.concatenate(( first_spatial_dimensions, numpy.array([current_num_filters], dtype=int) )) layer_object = keras.layers.Reshape( target_shape=first_dimensions )(input_layer_object) num_main_layers = len(upsampling_factors) kernel_size_tuple = (CONV_FILTER_SIZE, CONV_FILTER_SIZE) for i in range(num_main_layers): if i == num_main_layers - 1: current_num_filters = num_output_channels + 0 # layer_object = keras.layers.ZeroPadding2D( # padding=((1, 0), (1, 0)), data_format='channels_last' # )(layer_object) elif upsampling_factors[i] == 1: current_num_filters = int(numpy.round(current_num_filters / 2)) this_stride_tuple = (upsampling_factors[i], upsampling_factors[i]) if use_transposed_conv: layer_object = keras.layers.Conv2DTranspose( filters=current_num_filters, kernel_size=kernel_size_tuple, strides=this_stride_tuple, padding='same', data_format='channels_last', dilation_rate=(1, 1), activation=None, use_bias=True, kernel_initializer='glorot_uniform', bias_initializer='zeros', kernel_regularizer=regularizer_object )(layer_object) else: if upsampling_factors[i] > 1: try: layer_object = keras.layers.UpSampling2D( size=this_stride_tuple, data_format='channels_last', interpolation='bilinear' )(layer_object) except: layer_object = keras.layers.UpSampling2D( size=this_stride_tuple, data_format='channels_last' )(layer_object) layer_object = keras.layers.Conv2D( filters=current_num_filters, kernel_size=kernel_size_tuple, strides=(1, 1), padding='same', data_format='channels_last', dilation_rate=(1, 1), activation=None, use_bias=True, kernel_initializer='glorot_uniform', bias_initializer='zeros', kernel_regularizer=regularizer_object )(layer_object) use_activation_here = ( activation_function_name is not None and (i < num_main_layers - 1 or use_activn_for_last_layer) ) if use_activation_here: layer_object = architecture_utils.get_activation_layer( activation_function_string=activation_function_name, alpha_for_elu=alpha_for_elu, alpha_for_relu=alpha_for_relu )(layer_object) use_batch_norm_here = ( use_batch_norm and (i < num_main_layers - 1 or use_batch_norm_for_last_layer) ) if use_batch_norm_here: layer_object = ( architecture_utils.get_batch_norm_layer()(layer_object) ) # Compile CNN. model_object = keras.models.Model( inputs=input_layer_object, outputs=layer_object) model_object.compile( loss=keras.losses.mean_squared_error, optimizer=keras.optimizers.Adam() ) model_object.summary() return model_object
def create_3d_net( num_input_features, first_spatial_dimensions, rowcol_upsampling_factors, height_upsampling_factors, num_output_channels, l1_weight=DEFAULT_L1_WEIGHT, l2_weight=DEFAULT_L2_WEIGHT, use_transposed_conv=True, activation_function_name=None, alpha_for_elu=DEFAULT_ALPHA_FOR_ELU, alpha_for_relu=DEFAULT_ALPHA_FOR_RELU, use_activn_for_last_layer=False, use_batch_norm=True, use_batch_norm_for_last_layer=True): """Creates (but does not train) upconvnet with 3 spatial dimensions. L = number of main (transposed-conv or upsampling) layers :param num_input_features: Length of input feature vector. :param first_spatial_dimensions: length-3 numpy array of dimensions in first main layer. The order should be (num_rows, num_columns, num_heights). Before it is passed to the first main layer, the feature vector will be reshaped into a grid with these dimensions. :param rowcol_upsampling_factors: length-L numpy array of upsampling factors for horizontal dimensions. :param height_upsampling_factors: length-L numpy array of upsampling factors for vertical dimension. :param num_output_channels: Number of channels in output image. :param l1_weight: Weight of L1 regularization for conv and transposed-conv layers. :param l2_weight: Same but for L2 regularization. :param use_transposed_conv: Boolean flag. If True, each upsampling will be done with a transposed-conv layer. If False, each upsampling will be done with an upsampling layer followed by a normal conv layer. :param activation_function_name: Activation function. If you do not want activation, make this None. Otherwise, must be accepted by `architecture_utils.check_activation_function`. :param alpha_for_elu: See doc for `architecture_utils.check_activation_function`. :param alpha_for_relu: Same. :param use_activn_for_last_layer: Boolean flag. If True, will apply activation function to output image. :param use_batch_norm: Boolean flag. If True, will apply batch normalization to conv and transposed-conv layers. :param use_batch_norm_for_last_layer: Boolean flag. If True, will apply batch normalization to output image. :return: model_object: Untrained model (instance of `keras.models.Model`). """ # TODO(thunderhoser): This method assumes that the original CNN does # edge-padding. # Check input args. error_checking.assert_is_integer(num_input_features) error_checking.assert_is_greater(num_input_features, 0) error_checking.assert_is_integer(num_output_channels) error_checking.assert_is_greater(num_output_channels, 0) error_checking.assert_is_geq(l1_weight, 0.) error_checking.assert_is_geq(l2_weight, 0.) error_checking.assert_is_boolean(use_transposed_conv) error_checking.assert_is_boolean(use_activn_for_last_layer) error_checking.assert_is_boolean(use_batch_norm) error_checking.assert_is_boolean(use_batch_norm_for_last_layer) error_checking.assert_is_numpy_array( first_spatial_dimensions, exact_dimensions=numpy.array([3], dtype=int) ) error_checking.assert_is_integer_numpy_array(first_spatial_dimensions) error_checking.assert_is_greater_numpy_array(first_spatial_dimensions, 0) error_checking.assert_is_numpy_array( rowcol_upsampling_factors, num_dimensions=1 ) error_checking.assert_is_integer_numpy_array(rowcol_upsampling_factors) error_checking.assert_is_geq_numpy_array(rowcol_upsampling_factors, 1) num_main_layers = len(rowcol_upsampling_factors) these_expected_dim = numpy.array([num_main_layers], dtype=int) error_checking.assert_is_numpy_array( height_upsampling_factors, exact_dimensions=these_expected_dim ) error_checking.assert_is_integer_numpy_array(height_upsampling_factors) error_checking.assert_is_geq_numpy_array(height_upsampling_factors, 1) # Set up CNN architecture. regularizer_object = keras.regularizers.l1_l2(l1=l1_weight, l2=l2_weight) input_layer_object = keras.layers.Input(shape=(num_input_features,)) current_num_filters = int(numpy.round( num_input_features / numpy.prod(first_spatial_dimensions) )) first_dimensions = numpy.concatenate(( first_spatial_dimensions, numpy.array([current_num_filters], dtype=int) )) layer_object = keras.layers.Reshape( target_shape=first_dimensions )(input_layer_object) kernel_size_tuple = (CONV_FILTER_SIZE, CONV_FILTER_SIZE, CONV_FILTER_SIZE) for i in range(num_main_layers): if i == num_main_layers - 1: current_num_filters = num_output_channels + 0 elif rowcol_upsampling_factors[i] == 1: current_num_filters = int(numpy.round(current_num_filters / 2)) this_stride_tuple = ( rowcol_upsampling_factors[i], rowcol_upsampling_factors[i], height_upsampling_factors[i] ) if use_transposed_conv: layer_object = keras.layers.Conv3DTranspose( filters=current_num_filters, kernel_size=kernel_size_tuple, strides=this_stride_tuple, padding='same', data_format='channels_last', dilation_rate=(1, 1, 1), activation=None, use_bias=True, kernel_initializer='glorot_uniform', bias_initializer='zeros', kernel_regularizer=regularizer_object )(layer_object) else: if rowcol_upsampling_factors[i] > 1: try: layer_object = keras.layers.UpSampling3D( size=this_stride_tuple, data_format='channels_last', interpolation='bilinear' )(layer_object) except: layer_object = keras.layers.UpSampling3D( size=this_stride_tuple, data_format='channels_last' )(layer_object) layer_object = keras.layers.Conv3D( filters=current_num_filters, kernel_size=kernel_size_tuple, strides=(1, 1, 1), padding='same', data_format='channels_last', dilation_rate=(1, 1, 1), activation=None, use_bias=True, kernel_initializer='glorot_uniform', bias_initializer='zeros', kernel_regularizer=regularizer_object )(layer_object) use_activation_here = ( activation_function_name is not None and (i < num_main_layers - 1 or use_activn_for_last_layer) ) if use_activation_here: layer_object = architecture_utils.get_activation_layer( activation_function_string=activation_function_name, alpha_for_elu=alpha_for_elu, alpha_for_relu=alpha_for_relu )(layer_object) use_batch_norm_here = ( use_batch_norm and (i < num_main_layers - 1 or use_batch_norm_for_last_layer) ) if use_batch_norm_here: layer_object = ( architecture_utils.get_batch_norm_layer()(layer_object) ) # Compile CNN. model_object = keras.models.Model( inputs=input_layer_object, outputs=layer_object) model_object.compile( loss=keras.losses.mean_squared_error, optimizer=keras.optimizers.Adam() ) model_object.summary() return model_object
def _check_input_args(option_dict): """Error-checks input arguments. :param option_dict: See doc for `find_convective_pixels`. :return: option_dict: Same as input, except that defaults might have been added. """ if option_dict is None: orig_option_dict = {} else: orig_option_dict = option_dict.copy() option_dict = DEFAULT_OPTION_DICT.copy() option_dict.update(orig_option_dict) option_dict[PEAKEDNESS_NEIGH_KEY] = float( option_dict[PEAKEDNESS_NEIGH_KEY]) option_dict[MAX_PEAKEDNESS_HEIGHT_KEY] = float( option_dict[MAX_PEAKEDNESS_HEIGHT_KEY]) option_dict[MIN_ECHO_TOP_KEY] = int( numpy.round(option_dict[MIN_ECHO_TOP_KEY])) option_dict[ECHO_TOP_LEVEL_KEY] = float(option_dict[ECHO_TOP_LEVEL_KEY]) option_dict[MIN_SIZE_KEY] = int(numpy.round(option_dict[MIN_SIZE_KEY])) option_dict[MIN_COMPOSITE_REFL_CRITERION5_KEY] = float( option_dict[MIN_COMPOSITE_REFL_CRITERION5_KEY]) option_dict[MIN_COMPOSITE_REFL_AML_KEY] = float( option_dict[MIN_COMPOSITE_REFL_AML_KEY]) error_checking.assert_is_greater(option_dict[PEAKEDNESS_NEIGH_KEY], 0.) error_checking.assert_is_greater(option_dict[MAX_PEAKEDNESS_HEIGHT_KEY], 0.) error_checking.assert_is_boolean(option_dict[HALVE_RESOLUTION_KEY]) error_checking.assert_is_greater(option_dict[MIN_ECHO_TOP_KEY], 0) error_checking.assert_is_greater(option_dict[ECHO_TOP_LEVEL_KEY], 0.) error_checking.assert_is_greater(option_dict[MIN_SIZE_KEY], 1) error_checking.assert_is_greater( option_dict[MIN_COMPOSITE_REFL_CRITERION5_KEY], 0.) error_checking.assert_is_greater(option_dict[MIN_COMPOSITE_REFL_AML_KEY], 0.) if option_dict[MIN_COMPOSITE_REFL_CRITERION1_KEY] is not None: option_dict[MIN_COMPOSITE_REFL_CRITERION1_KEY] = float( option_dict[MIN_COMPOSITE_REFL_CRITERION1_KEY]) error_checking.assert_is_greater( option_dict[MIN_COMPOSITE_REFL_CRITERION1_KEY], 0.) return option_dict
def plot_parallels(basemap_object, axes_object, min_latitude_deg=None, max_latitude_deg=None, num_parallels=DEFAULT_NUM_PARALLELS, font_size=FONT_SIZE, line_width=DEFAULT_GRID_LINE_WIDTH, line_colour=DEFAULT_GRID_LINE_COLOUR, z_order=DEFAULT_GRID_LINE_Z_ORDER): """Plots parallels (grid lines for latitude). If `min_latitude_deg` and `max_latitude_deg` are both None, this method will take plotting limits from `basemap_object`. :param basemap_object: See doc for `plot_countries`. :param axes_object: Same. :param min_latitude_deg: Minimum latitude for grid lines. :param max_latitude_deg: Max latitude for grid lines. :param num_parallels: Number of parallels. :param font_size: Font size for tick labels. :param line_width: See doc for `plot_countries`. :param line_colour: Same. :param z_order: Same. """ if min_latitude_deg is None or max_latitude_deg is None: latitude_limits_deg = _basemap_to_latlng_limits(basemap_object)[0] min_latitude_deg = latitude_limits_deg[0] max_latitude_deg = latitude_limits_deg[1] error_checking.assert_is_valid_latitude(min_latitude_deg) error_checking.assert_is_valid_latitude(max_latitude_deg) error_checking.assert_is_greater(max_latitude_deg, min_latitude_deg) error_checking.assert_is_integer(num_parallels) error_checking.assert_is_geq(num_parallels, 2) parallel_spacing_deg = ((max_latitude_deg - min_latitude_deg) / (num_parallels - 1)) if parallel_spacing_deg < 1.: parallel_spacing_deg = number_rounding.round_to_nearest( parallel_spacing_deg, 0.1) else: parallel_spacing_deg = numpy.round(parallel_spacing_deg) min_latitude_deg = number_rounding.ceiling_to_nearest( min_latitude_deg, parallel_spacing_deg) max_latitude_deg = number_rounding.floor_to_nearest( max_latitude_deg, parallel_spacing_deg) num_parallels = 1 + int( numpy.round( (max_latitude_deg - min_latitude_deg) / parallel_spacing_deg)) latitudes_deg = numpy.linspace(min_latitude_deg, max_latitude_deg, num=num_parallels) basemap_object.drawparallels(latitudes_deg, color=colour_from_numpy_to_tuple(line_colour), fontsize=font_size, linewidth=line_width, labels=[True, False, False, False], ax=axes_object, zorder=z_order)
def _run(model_file_name, example_file_name, num_examples, example_indices, component_type_string, target_class, layer_name, ideal_activation, neuron_indices, channel_index, num_iterations, learning_rate, output_file_name): """Runs backwards optimization on a trained CNN. This is effectively the main method. :param model_file_name: See documentation at top of file. :param example_file_name: Same. :param num_examples: Same. :param example_indices: Same. :param component_type_string: Same. :param target_class: Same. :param layer_name: Same. :param ideal_activation: Same. :param neuron_indices: Same. :param channel_index: Same. :param num_iterations: Same. :param learning_rate: Same. :param output_file_name: Same. """ if num_examples <= 0: num_examples = None print 'Reading model from: "{0:s}"...'.format(model_file_name) model_object = traditional_cnn.read_keras_model(model_file_name) model_metafile_name = traditional_cnn.find_metafile( model_file_name=model_file_name) print 'Reading model metadata from: "{0:s}"...'.format(model_metafile_name) model_metadata_dict = traditional_cnn.read_model_metadata( model_metafile_name) print 'Reading normalized examples from: "{0:s}"...'.format( example_file_name) example_dict = trainval_io.read_downsized_3d_examples( netcdf_file_name=example_file_name, predictor_names_to_keep=model_metadata_dict[ traditional_cnn.NARR_PREDICTOR_NAMES_KEY], num_half_rows_to_keep=model_metadata_dict[ traditional_cnn.NUM_ROWS_IN_HALF_GRID_KEY], num_half_columns_to_keep=model_metadata_dict[ traditional_cnn.NUM_COLUMNS_IN_HALF_GRID_KEY]) predictor_matrix = example_dict[trainval_io.PREDICTOR_MATRIX_KEY] if num_examples is None: error_checking.assert_is_geq_numpy_array(example_indices, 0) num_examples = len(example_indices) else: error_checking.assert_is_greater(num_examples, 0) num_examples_total = predictor_matrix.shape[0] example_indices = numpy.linspace(0, num_examples_total - 1, num=num_examples_total, dtype=int) num_examples = min([num_examples, num_examples_total]) example_indices = numpy.random.choice(example_indices, size=num_examples, replace=False) predictor_matrix = predictor_matrix[example_indices, ...] optimized_predictor_matrix = numpy.full(predictor_matrix.shape, numpy.nan) print SEPARATOR_STRING for i in range(num_examples): if component_type_string == CLASS_COMPONENT_TYPE_STRING: print( 'Optimizing {0:d}th of {1:d} images for target class {2:d}...' ).format(i + 1, num_examples, target_class) optimized_predictor_matrix[i, ...] = ( backwards_opt.optimize_input_for_class( model_object=model_object, target_class=target_class, init_function_or_matrices=[predictor_matrix[[i], ...]], num_iterations=num_iterations, learning_rate=learning_rate)[0]) elif component_type_string == NEURON_COMPONENT_TYPE_STRING: print( 'Optimizing {0:d}th of {1:d} images for neuron {2:s} in layer ' '"{3:s}"...').format(i + 1, num_examples, str(neuron_indices), layer_name) optimized_predictor_matrix[i, ...] = ( backwards_opt.optimize_input_for_neuron( model_object=model_object, layer_name=layer_name, neuron_indices=neuron_indices, init_function_or_matrices=[predictor_matrix[[i], ...]], num_iterations=num_iterations, learning_rate=learning_rate, ideal_activation=ideal_activation)[0]) else: print( 'Optimizing {0:d}th of {1:d} images for channel {2:d} in layer ' '"{3:s}"...').format(i + 1, num_examples, channel_index, layer_name) optimized_predictor_matrix[i, ...] = ( backwards_opt.optimize_input_for_channel( model_object=model_object, layer_name=layer_name, channel_index=channel_index, init_function_or_matrices=[predictor_matrix[[i], ...]], stat_function_for_neuron_activations=K.max, num_iterations=num_iterations, learning_rate=learning_rate, ideal_activation=ideal_activation)[0]) print SEPARATOR_STRING print 'Writing results to: "{0:s}"...'.format(output_file_name) backwards_opt.write_results( pickle_file_name=output_file_name, list_of_optimized_input_matrices=[optimized_predictor_matrix], model_file_name=model_file_name, init_function_name_or_matrices=[predictor_matrix], num_iterations=num_iterations, learning_rate=learning_rate, component_type_string=component_type_string, target_class=target_class, layer_name=layer_name, neuron_indices=neuron_indices, channel_index=channel_index, ideal_activation=ideal_activation)
def plot_meridians(basemap_object, axes_object, min_longitude_deg=None, max_longitude_deg=None, num_meridians=DEFAULT_NUM_MERIDIANS, font_size=FONT_SIZE, line_width=DEFAULT_GRID_LINE_WIDTH, line_colour=DEFAULT_GRID_LINE_COLOUR, z_order=DEFAULT_GRID_LINE_Z_ORDER): """Plots meridians (grid lines for longitude). If `min_longitude_deg` and `max_longitude_deg` are both None, this method will take plotting limits from `basemap_object`. :param basemap_object: See doc for `plot_countries`. :param axes_object: Same. :param min_longitude_deg: Minimum longitude for grid lines. :param max_longitude_deg: Max longitude for grid lines. :param num_meridians: Number of meridians. :param font_size: Font size for tick labels. :param line_width: See doc for `plot_countries`. :param line_colour: Same. :param z_order: Same. """ if min_longitude_deg is None or max_longitude_deg is None: longitude_limits_deg = _basemap_to_latlng_limits(basemap_object)[1] min_longitude_deg = longitude_limits_deg[0] max_longitude_deg = longitude_limits_deg[1] min_longitude_deg = lng_conversion.convert_lng_positive_in_west( min_longitude_deg) max_longitude_deg = lng_conversion.convert_lng_positive_in_west( max_longitude_deg) error_checking.assert_is_greater(max_longitude_deg, min_longitude_deg) error_checking.assert_is_integer(num_meridians) error_checking.assert_is_geq(num_meridians, 2) meridian_spacing_deg = ((max_longitude_deg - min_longitude_deg) / (num_meridians - 1)) if meridian_spacing_deg < 1.: meridian_spacing_deg = number_rounding.round_to_nearest( meridian_spacing_deg, 0.1) else: meridian_spacing_deg = numpy.round(meridian_spacing_deg) min_longitude_deg = number_rounding.ceiling_to_nearest( min_longitude_deg, meridian_spacing_deg) max_longitude_deg = number_rounding.floor_to_nearest( max_longitude_deg, meridian_spacing_deg) num_meridians = 1 + int( numpy.round( (max_longitude_deg - min_longitude_deg) / meridian_spacing_deg)) longitudes_deg = numpy.linspace(min_longitude_deg, max_longitude_deg, num=num_meridians) basemap_object.drawmeridians(longitudes_deg, color=colour_from_numpy_to_tuple(line_colour), fontsize=font_size, linewidth=line_width, labels=[False, False, False, True], ax=axes_object, zorder=z_order)
def get_uniform_colours_in_hsv_space( num_colours, colour_to_exclude_rgb=None, min_rgb_distance_from_colour=DEFAULT_MIN_RGB_DISTANCE_FROM_COLOUR): """Returns array of uniformly spaced colours in HSV space. N = number of colours :param num_colours: Number of colours. :param colour_to_exclude_rgb: This colour (and similar colours) will not be returned. If None, no colours will be excluded. :param min_rgb_distance_from_colour: No colour within this Euclidean RGB distance of `colour_to_exclude_rgb` will be returned. :return: rgb_matrix: N-by-3 numpy array, where each row is an RGB colour. All values range from 0...1. """ if colour_to_exclude_rgb is not None: error_checking.assert_is_numpy_array( colour_to_exclude_rgb, exact_dimensions=numpy.array([3])) error_checking.assert_is_geq_numpy_array(colour_to_exclude_rgb, 0.) error_checking.assert_is_leq_numpy_array(colour_to_exclude_rgb, 1.) error_checking.assert_is_greater(min_rgb_distance_from_colour, 0.) error_checking.assert_is_leq(min_rgb_distance_from_colour, 1.) orig_num_colours = copy.deepcopy(num_colours) num_colours = 10 * num_colours num_hsv_values = ( NUM_H_FOR_HSV_SPACE * NUM_S_FOR_HSV_SPACE * NUM_V_FOR_HSV_SPACE) linear_indices = numpy.linspace( 0., float(num_hsv_values - 1), num=num_colours) linear_indices = numpy.round(linear_indices).astype(int) hsv_matrix = numpy.full((num_colours, 3), numpy.nan) hsv_matrix[:, 0], hsv_matrix[:, 1], hsv_matrix[:, 2] = numpy.unravel_index( linear_indices, dims=(NUM_H_FOR_HSV_SPACE, NUM_S_FOR_HSV_SPACE, NUM_V_FOR_HSV_SPACE)) hsv_matrix = hsv_matrix / 255 rgb_matrix = skimage.color.hsv2rgb( numpy.reshape(hsv_matrix, (1, num_colours, 3))) rgb_matrix = numpy.reshape(rgb_matrix, (num_colours, 3)) if colour_to_exclude_rgb is not None: good_indices = [] all_indices = range(num_colours) numpy.random.shuffle(all_indices) for i in all_indices: this_distance = numpy.linalg.norm( rgb_matrix[i, :] - colour_to_exclude_rgb) if not this_distance >= min_rgb_distance_from_colour: continue good_indices.append(i) if len(good_indices) == orig_num_colours: break good_indices = numpy.array(good_indices, dtype=int) rgb_matrix = rgb_matrix[good_indices, :] num_colours = copy.deepcopy(orig_num_colours) return rgb_matrix[0:(num_colours + 1), :]
def _run(input_file_name, colour_map_name, min_unguided_value, max_unguided_value, max_guided_value, num_unguided_contours, half_num_guided_contours, smoothing_radius_grid_cells, allow_whitespace, plot_panel_names, add_titles, label_colour_bars, colour_bar_length, top_output_dir_name): """Plots Grad-CAM output (guided and unguided class-activation maps). This is effectively the main method. :param input_file_name: See documentation at top of file. :param colour_map_name: Same. :param min_unguided_value: Same. :param max_unguided_value: Same. :param max_guided_value: Same. :param num_unguided_contours: Same. :param half_num_guided_contours: Same. :param smoothing_radius_grid_cells: Same. :param allow_whitespace: Same. :param plot_panel_names: Same. :param add_titles: Same. :param label_colour_bars: Same. :param colour_bar_length: Same. :param top_output_dir_name: Same. """ if smoothing_radius_grid_cells <= 0: smoothing_radius_grid_cells = None unguided_cam_dir_name = '{0:s}/main_gradcam'.format(top_output_dir_name) guided_cam_dir_name = '{0:s}/guided_gradcam'.format(top_output_dir_name) file_system_utils.mkdir_recursive_if_necessary( directory_name=unguided_cam_dir_name ) file_system_utils.mkdir_recursive_if_necessary( directory_name=guided_cam_dir_name ) # Check input args. colour_map_object = pyplot.get_cmap(colour_map_name) error_checking.assert_is_greater(min_unguided_value, 0.) error_checking.assert_is_greater(max_unguided_value, min_unguided_value) error_checking.assert_is_greater(max_guided_value, 0.) error_checking.assert_is_geq(num_unguided_contours, 10) error_checking.assert_is_geq(half_num_guided_contours, 5) print('Reading data from: "{0:s}"...'.format(input_file_name)) gradcam_dict, pmm_flag = gradcam.read_file(input_file_name) if pmm_flag: predictor_matrices = gradcam_dict.pop( gradcam.MEAN_PREDICTOR_MATRICES_KEY ) cam_matrices = gradcam_dict.pop(gradcam.MEAN_CAM_MATRICES_KEY) guided_cam_matrices = gradcam_dict.pop( gradcam.MEAN_GUIDED_CAM_MATRICES_KEY ) full_storm_id_strings = [None] storm_times_unix_sec = [None] for j in range(len(predictor_matrices)): predictor_matrices[j] = numpy.expand_dims( predictor_matrices[j], axis=0 ) if cam_matrices[j] is None: continue cam_matrices[j] = numpy.expand_dims( cam_matrices[j], axis=0 ) guided_cam_matrices[j] = numpy.expand_dims( guided_cam_matrices[j], axis=0 ) else: predictor_matrices = gradcam_dict.pop(gradcam.PREDICTOR_MATRICES_KEY) cam_matrices = gradcam_dict.pop(gradcam.CAM_MATRICES_KEY) guided_cam_matrices = gradcam_dict.pop(gradcam.GUIDED_CAM_MATRICES_KEY) full_storm_id_strings = gradcam_dict[gradcam.FULL_STORM_IDS_KEY] storm_times_unix_sec = gradcam_dict[gradcam.STORM_TIMES_KEY] if smoothing_radius_grid_cells is not None: cam_matrices, guided_cam_matrices = _smooth_maps( cam_matrices=cam_matrices, guided_cam_matrices=guided_cam_matrices, smoothing_radius_grid_cells=smoothing_radius_grid_cells ) # Read metadata for CNN. model_file_name = gradcam_dict[gradcam.MODEL_FILE_KEY] model_metafile_name = '{0:s}/model_metadata.p'.format( os.path.split(model_file_name)[0] ) print('Reading model metadata from: "{0:s}"...'.format(model_metafile_name)) model_metadata_dict = cnn.read_model_metadata(model_metafile_name) print(SEPARATOR_STRING) num_examples = predictor_matrices[0].shape[0] num_matrices = len(predictor_matrices) for i in range(num_examples): this_handle_dict = plot_examples.plot_one_example( list_of_predictor_matrices=predictor_matrices, model_metadata_dict=model_metadata_dict, pmm_flag=pmm_flag, example_index=i, plot_sounding=False, allow_whitespace=allow_whitespace, plot_panel_names=plot_panel_names, add_titles=add_titles, label_colour_bars=label_colour_bars, colour_bar_length=colour_bar_length ) these_figure_objects = this_handle_dict[plot_examples.RADAR_FIGURES_KEY] these_axes_object_matrices = ( this_handle_dict[plot_examples.RADAR_AXES_KEY] ) for j in range(num_matrices): if cam_matrices[j] is None: continue # print(numpy.percentile(cam_matrices[j][i, ...], 0.)) # print(numpy.percentile(cam_matrices[j][i, ...], 1.)) # print(numpy.percentile(cam_matrices[j][i, ...], 99.)) # print(numpy.percentile(cam_matrices[j][i, ...], 100.)) # # print('\n\n') # # print(numpy.percentile(guided_cam_matrices[j][i, ...], 0.)) # print(numpy.percentile(guided_cam_matrices[j][i, ...], 1.)) # print(numpy.percentile(guided_cam_matrices[j][i, ...], 99.)) # print(numpy.percentile(guided_cam_matrices[j][i, ...], 100.)) # # print('\n\n------------------------------\n\n') this_num_spatial_dim = len(predictor_matrices[j].shape) - 2 if this_num_spatial_dim == 3: _plot_3d_radar_cam( colour_map_object=colour_map_object, min_unguided_value=min_unguided_value, max_unguided_value=max_unguided_value, num_unguided_contours=num_unguided_contours, max_guided_value=max_guided_value, half_num_guided_contours=half_num_guided_contours, label_colour_bars=label_colour_bars, colour_bar_length=colour_bar_length, figure_objects=these_figure_objects, axes_object_matrices=these_axes_object_matrices, model_metadata_dict=model_metadata_dict, output_dir_name=unguided_cam_dir_name, cam_matrix=cam_matrices[j][i, ...], full_storm_id_string=full_storm_id_strings[i], storm_time_unix_sec=storm_times_unix_sec[i] ) else: _plot_2d_radar_cam( colour_map_object=colour_map_object, min_unguided_value=min_unguided_value, max_unguided_value=max_unguided_value, num_unguided_contours=num_unguided_contours, max_guided_value=max_guided_value, half_num_guided_contours=half_num_guided_contours, label_colour_bars=label_colour_bars, colour_bar_length=colour_bar_length, figure_objects=these_figure_objects, axes_object_matrices=these_axes_object_matrices, model_metadata_dict=model_metadata_dict, output_dir_name=unguided_cam_dir_name, cam_matrix=cam_matrices[j][i, ...], full_storm_id_string=full_storm_id_strings[i], storm_time_unix_sec=storm_times_unix_sec[i] ) this_handle_dict = plot_examples.plot_one_example( list_of_predictor_matrices=predictor_matrices, model_metadata_dict=model_metadata_dict, pmm_flag=pmm_flag, example_index=i, plot_sounding=False, allow_whitespace=allow_whitespace, plot_panel_names=plot_panel_names, add_titles=add_titles, label_colour_bars=label_colour_bars, colour_bar_length=colour_bar_length ) these_figure_objects = this_handle_dict[plot_examples.RADAR_FIGURES_KEY] these_axes_object_matrices = ( this_handle_dict[plot_examples.RADAR_AXES_KEY] ) for j in range(num_matrices): if guided_cam_matrices[j] is None: continue this_num_spatial_dim = len(predictor_matrices[j].shape) - 2 if this_num_spatial_dim == 3: _plot_3d_radar_cam( colour_map_object=colour_map_object, min_unguided_value=min_unguided_value, max_unguided_value=max_unguided_value, num_unguided_contours=num_unguided_contours, max_guided_value=max_guided_value, half_num_guided_contours=half_num_guided_contours, label_colour_bars=label_colour_bars, colour_bar_length=colour_bar_length, figure_objects=these_figure_objects, axes_object_matrices=these_axes_object_matrices, model_metadata_dict=model_metadata_dict, output_dir_name=guided_cam_dir_name, guided_cam_matrix=guided_cam_matrices[j][i, ...], full_storm_id_string=full_storm_id_strings[i], storm_time_unix_sec=storm_times_unix_sec[i] ) else: _plot_2d_radar_cam( colour_map_object=colour_map_object, min_unguided_value=min_unguided_value, max_unguided_value=max_unguided_value, num_unguided_contours=num_unguided_contours, max_guided_value=max_guided_value, half_num_guided_contours=half_num_guided_contours, label_colour_bars=label_colour_bars, colour_bar_length=colour_bar_length, figure_objects=these_figure_objects, axes_object_matrices=these_axes_object_matrices, model_metadata_dict=model_metadata_dict, output_dir_name=guided_cam_dir_name, guided_cam_matrix=guided_cam_matrices[j][i, ...], full_storm_id_string=full_storm_id_strings[i], storm_time_unix_sec=storm_times_unix_sec[i] )
def get_contingency_table_extremes( storm_activations, storm_target_values, num_hits, num_misses, num_false_alarms, num_correct_nulls): """Returns "contingency-table extremes". Specifically, this method returns the following: - best hits (positive examples with the highest activations) - worst misses (positive examples with the lowest activations) - worst false alarms (negative examples with the highest activations) - best correct nulls (negative examples with the lowest activations) DEFINITIONS One "example" is one storm object. A "negative example" is a storm object with target = 0. A "positive example" is a storm object with target = 1. The target variable must be binary. E = number of examples :param storm_activations: length-E numpy array of model activations. :param storm_target_values: length-E numpy array of target values. These must be integers from 0...1. :param num_hits: Number of best hits. :param num_misses: Number of worst misses. :param num_false_alarms: Number of worst false alarms. :param num_correct_nulls: Number of best correct nulls. :return: ct_extreme_dict: Dictionary with the following keys. ct_extreme_dict['hit_indices']: 1-D numpy array with indices of best hits. ct_extreme_dict['miss_indices']: 1-D numpy array with indices of worst misses. ct_extreme_dict['false_alarm_indices']: 1-D numpy array with indices of worst false alarms. ct_extreme_dict['correct_null_indices']: 1-D numpy array with indices of best correct nulls. """ error_checking.assert_is_numpy_array(storm_activations, num_dimensions=1) error_checking.assert_is_integer_numpy_array(storm_target_values) error_checking.assert_is_geq_numpy_array(storm_target_values, 0) error_checking.assert_is_leq_numpy_array(storm_target_values, 1) num_storm_objects = len(storm_activations) error_checking.assert_is_numpy_array( storm_target_values, exact_dimensions=numpy.array([num_storm_objects])) error_checking.assert_is_integer(num_hits) error_checking.assert_is_geq(num_hits, 0) error_checking.assert_is_integer(num_misses) error_checking.assert_is_geq(num_misses, 0) error_checking.assert_is_integer(num_false_alarms) error_checking.assert_is_geq(num_false_alarms, 0) error_checking.assert_is_integer(num_correct_nulls) error_checking.assert_is_geq(num_correct_nulls, 0) error_checking.assert_is_greater( num_hits + num_misses + num_false_alarms + num_correct_nulls, 0) positive_indices = numpy.where(storm_target_values == 1)[0] num_hits = min([num_hits, len(positive_indices)]) num_misses = min([num_misses, len(positive_indices)]) if num_hits > 0: these_indices = numpy.argsort(storm_activations[positive_indices])[::-1] hit_indices = positive_indices[these_indices][:num_hits] else: hit_indices = numpy.array([], dtype=int) if num_misses > 0: these_indices = numpy.argsort(storm_activations[positive_indices]) miss_indices = positive_indices[these_indices][:num_misses] else: miss_indices = numpy.array([], dtype=int) negative_indices = numpy.where(storm_target_values == 0)[0] num_false_alarms = min([num_false_alarms, len(negative_indices)]) num_correct_nulls = min([num_correct_nulls, len(negative_indices)]) if num_false_alarms > 0: these_indices = numpy.argsort(storm_activations[negative_indices])[::-1] false_alarm_indices = negative_indices[these_indices][:num_false_alarms] else: false_alarm_indices = numpy.array([], dtype=int) if num_correct_nulls > 0: these_indices = numpy.argsort(storm_activations[negative_indices]) correct_null_indices = negative_indices[ these_indices][:num_correct_nulls] else: correct_null_indices = numpy.array([], dtype=int) return { HIT_INDICES_KEY: hit_indices, MISS_INDICES_KEY: miss_indices, FALSE_ALARM_INDICES_KEY: false_alarm_indices, CORRECT_NULL_INDICES_KEY: correct_null_indices }
def _fit_svd(baseline_feature_matrix, test_feature_matrix, percent_variance_to_keep): """Fits SVD (singular-value decomposition) model. B = number of baseline examples (storm objects) T = number of testing examples (storm objects) Z = number of scalar features (produced by dense layer of a CNN) K = number of modes (top eigenvectors) retained The SVD model will be fit only to the baseline set, but both the baseline and testing sets will be used to compute normalization parameters (means and standard deviations). Before, when only the baseline set was used to compute normalization params, the testing set had huge standard deviations, which caused the results of novelty detection to be physically unrealistic. :param baseline_feature_matrix: B-by-Z numpy array of features. :param test_feature_matrix: T-by-Z numpy array of features. :param percent_variance_to_keep: Percentage of variance to keep. Determines how many eigenvectors (K in the above discussion) will be used in the SVD model. :return: svd_dictionary: Dictionary with the following keys. svd_dictionary['eof_matrix']: Z-by-K numpy array, where each column is an EOF (empirical orthogonal function). svd_dictionary['feature_means']: length-Z numpy array with mean value of each feature (before transformation). svd_dictionary['feature_standard_deviations']: length-Z numpy array with standard deviation of each feature (before transformation). """ error_checking.assert_is_greater(percent_variance_to_keep, 0.) error_checking.assert_is_leq(percent_variance_to_keep, 100.) combined_feature_matrix = numpy.concatenate( (baseline_feature_matrix, test_feature_matrix), axis=0) combined_feature_matrix, feature_means, feature_standard_deviations = ( _normalize_features(feature_matrix=combined_feature_matrix)) num_features = baseline_feature_matrix.shape[1] num_baseline_examples = baseline_feature_matrix.shape[0] baseline_feature_matrix = combined_feature_matrix[:num_baseline_examples, ...] eigenvalues, eof_matrix = numpy.linalg.svd(baseline_feature_matrix)[1:] eigenvalues = eigenvalues**2 explained_variances = eigenvalues / numpy.sum(eigenvalues) cumulative_explained_variances = numpy.cumsum(explained_variances) fraction_of_variance_to_keep = 0.01 * percent_variance_to_keep these_indices = numpy.where( cumulative_explained_variances >= fraction_of_variance_to_keep)[0] if len(these_indices) == 0: these_indices = numpy.array([num_features - 1], dtype=int) num_modes_to_keep = 1 + these_indices[0] print('Number of modes required to explain {0:f}% of variance: {1:d}' ).format(percent_variance_to_keep, num_modes_to_keep) return { EOF_MATRIX_KEY: numpy.transpose(eof_matrix)[..., :num_modes_to_keep], FEATURE_MEANS_KEY: feature_means, FEATURE_STDEVS_KEY: feature_standard_deviations }
def read_field_from_grib_file(grib_file_name, field_name_grib1, num_grid_rows, num_grid_columns, sentinel_value=None, temporary_dir_name=None, wgrib_exe_name=WGRIB_EXE_NAME_DEFAULT, wgrib2_exe_name=WGRIB2_EXE_NAME_DEFAULT, raise_error_if_fails=True): """Reads field from grib file. One field = one variable at one time step. M = number of rows (unique y-coordinates or latitudes of grid points) N = number of columns (unique x-coordinates or longitudes of grid points) :param grib_file_name: Path to input file. :param field_name_grib1: Field name in grib1 format (example: 500-mb height is "HGT:500 mb"). :param num_grid_rows: Number of rows expected in grid. :param num_grid_columns: Number of columns expected in grid. :param sentinel_value: Sentinel value (all instances will be replaced with NaN). :param temporary_dir_name: Name of temporary directory. An intermediate text file will be stored here. :param wgrib_exe_name: Path to wgrib executable. :param wgrib2_exe_name: Path to wgrib2 executable. :param raise_error_if_fails: Boolean flag. If the extraction fails and raise_error_if_fails = True, this method will error out. If the extraction fails and raise_error_if_fails = False, this method will return None. :return: field_matrix: M-by-N numpy array with values of the given field. If the grid is regular in x-y coordinates, x increases towards the right (in the positive direction of the second axis), while y increases downward (in the positive direction of the first axis). If the grid is regular in lat-long, replace "x" and "y" in the previous sentence with "long" and "lat," respectively. :raises: ValueError: if extraction fails and raise_error_if_fails = True. """ # Error-checking. error_checking.assert_is_string(field_name_grib1) error_checking.assert_is_integer(num_grid_rows) error_checking.assert_is_greater(num_grid_rows, 0) error_checking.assert_is_integer(num_grid_columns) error_checking.assert_is_greater(num_grid_columns, 0) error_checking.assert_file_exists(wgrib_exe_name) error_checking.assert_file_exists(wgrib2_exe_name) error_checking.assert_is_boolean(raise_error_if_fails) if sentinel_value is not None: error_checking.assert_is_not_nan(sentinel_value) # Housekeeping. grib_file_type = file_name_to_type(grib_file_name) if temporary_dir_name is not None: file_system_utils.mkdir_recursive_if_necessary( directory_name=temporary_dir_name) temporary_file_name = tempfile.NamedTemporaryFile(dir=temporary_dir_name, delete=False).name # Extract field to temporary file. if grib_file_type == GRIB1_FILE_TYPE: command_string = ( '"{0:s}" "{1:s}" -s | grep -w "{2:s}" | "{0:s}" -i "{1:s}" ' '-text -nh -o "{3:s}"').format(wgrib_exe_name, grib_file_name, field_name_grib1, temporary_file_name) else: command_string = ( '"{0:s}" "{1:s}" -s | grep -w "{2:s}" | "{0:s}" -i "{1:s}" ' '-no_header -text "{3:s}"').format( wgrib2_exe_name, grib_file_name, _field_name_grib1_to_grib2(field_name_grib1), temporary_file_name) try: subprocess.call(command_string, shell=True) except OSError as this_exception: os.remove(temporary_file_name) if raise_error_if_fails: raise warning_string = ( '\n\n{0:s}\n\nCommand (shown above) failed (details shown below).' '\n\n{1:s}').format(command_string, str(this_exception)) warnings.warn(warning_string) return None # Read field from temporary file. field_vector = numpy.loadtxt(temporary_file_name) os.remove(temporary_file_name) try: field_matrix = numpy.reshape(field_vector, (num_grid_rows, num_grid_columns)) except ValueError as this_exception: if raise_error_if_fails: raise warning_string = ( '\n\nnumpy.reshape failed (details shown below).\n\n{0:s}').format( str(this_exception)) warnings.warn(warning_string) return None return _sentinel_value_to_nan(data_matrix=field_matrix, sentinel_value=sentinel_value)
def _run(model_file_name, example_file_name, num_examples, example_indices, component_type_string, target_class, layer_name, ideal_activation, neuron_indices, channel_index, output_file_name): """Creates saliency map for each example, based on the same CNN. This is effectively the main method. :param model_file_name: See documentation at top of file. :param example_file_name: Same. :param num_examples: Same. :param example_indices: Same. :param component_type_string: Same. :param target_class: Same. :param layer_name: Same. :param ideal_activation: Same. :param neuron_indices: Same. :param channel_index: Same. :param output_file_name: Same. """ if num_examples <= 0: num_examples = None if num_examples is None: error_checking.assert_is_geq_numpy_array(example_indices, 0) else: error_checking.assert_is_greater(num_examples, 0) print 'Reading model from: "{0:s}"...'.format(model_file_name) model_object = traditional_cnn.read_keras_model(model_file_name) model_metafile_name = traditional_cnn.find_metafile( model_file_name=model_file_name) print 'Reading model metadata from: "{0:s}"...'.format(model_metafile_name) model_metadata_dict = traditional_cnn.read_model_metadata( model_metafile_name) print 'Reading normalized examples from: "{0:s}"...'.format( example_file_name) example_dict = trainval_io.read_downsized_3d_examples( netcdf_file_name=example_file_name, predictor_names_to_keep=model_metadata_dict[ traditional_cnn.NARR_PREDICTOR_NAMES_KEY], num_half_rows_to_keep=model_metadata_dict[ traditional_cnn.NUM_ROWS_IN_HALF_GRID_KEY], num_half_columns_to_keep=model_metadata_dict[ traditional_cnn.NUM_COLUMNS_IN_HALF_GRID_KEY]) predictor_matrix = example_dict[trainval_io.PREDICTOR_MATRIX_KEY] if num_examples is not None: num_examples_total = predictor_matrix.shape[0] example_indices = numpy.linspace(0, num_examples_total - 1, num=num_examples_total, dtype=int) num_examples = min([num_examples, num_examples_total]) example_indices = numpy.random.choice(example_indices, size=num_examples, replace=False) predictor_matrix = predictor_matrix[example_indices, ...] if component_type_string == CLASS_COMPONENT_TYPE_STRING: print 'Computing saliency maps for target class {0:d}...'.format( target_class) saliency_matrix = ( gg_saliency_maps.get_saliency_maps_for_class_activation( model_object=model_object, target_class=target_class, list_of_input_matrices=[predictor_matrix])[0]) elif component_type_string == NEURON_COMPONENT_TYPE_STRING: print('Computing saliency maps for neuron {0:s} in layer "{1:s}"...' ).format(str(neuron_indices), layer_name) saliency_matrix = ( gg_saliency_maps.get_saliency_maps_for_neuron_activation( model_object=model_object, layer_name=layer_name, neuron_indices=neuron_indices, list_of_input_matrices=[predictor_matrix], ideal_activation=ideal_activation)[0]) else: print('Computing saliency maps for channel {0:d} in layer "{1:s}"...' ).format(channel_index, layer_name) saliency_matrix = ( gg_saliency_maps.get_saliency_maps_for_channel_activation( model_object=model_object, layer_name=layer_name, channel_index=channel_index, list_of_input_matrices=[predictor_matrix], stat_function_for_neuron_activations=K.max, ideal_activation=ideal_activation)[0]) print 'Writing results to: "{0:s}"...'.format(output_file_name) ge_saliency_maps.write_file(pickle_file_name=output_file_name, normalized_predictor_matrix=predictor_matrix, saliency_matrix=saliency_matrix, model_file_name=model_file_name, component_type_string=component_type_string, target_class=target_class, layer_name=layer_name, ideal_activation=ideal_activation, neuron_indices=neuron_indices, channel_index=channel_index)
def buffer_simple_polygon(vertex_x_metres, vertex_y_metres, max_buffer_dist_metres, min_buffer_dist_metres=numpy.nan, preserve_angles=False): """Creates buffer around simple polygon. V_0 = number of original vertices V = number of final vertices :param vertex_x_metres: numpy array (length V_0) with x-coordinates of original vertices. :param vertex_y_metres: numpy array (length V_0) with y-coordinates of original vertices. :param max_buffer_dist_metres: Max buffer distance. :param min_buffer_dist_metres: Minimum buffer distance. If NaN, the buffer will be inclusive (i.e., the original polygon will be included in the buffer). Otherwise, the buffer will be exclusive (i.e., the buffer will not include the original polygon). For example, if `min_buffer_dist_metres` = NaN and `max_buffer_dist_metres` = 5, the buffer will include the original polygon and an area of 5 metres outside the original polygon. However, if `min_buffer_dist_metres` = 0 and `max_buffer_dist_metres` = 5, the buffer will include only an area of 5 metres outside the original polygon. If `min_buffer_dist_metres` = 1 and `max_buffer_dist_metres` = 5, the buffer will include only an area of 1-5 metres outside the original polygon. :param preserve_angles: Boolean flag. If True, will preserve the angles of all vertices in the original polygon, which means that distance will not be strictly respected. If False, will preserve buffer distances, which means that vertex angles will not be strictly respected. We highly recommend keeping this as False (True only for unit tests). :return: buffered_polygon_object: `shapely.geometry.Polygon` object. """ _check_vertex_arrays(vertex_x_metres, vertex_y_metres, allow_nan=False) error_checking.assert_is_geq(min_buffer_dist_metres, 0., allow_nan=True) error_checking.assert_is_not_nan(max_buffer_dist_metres) if not numpy.isnan(min_buffer_dist_metres): error_checking.assert_is_greater(max_buffer_dist_metres, min_buffer_dist_metres) error_checking.assert_is_boolean(preserve_angles) if preserve_angles: join_style = shapely.geometry.JOIN_STYLE.mitre else: join_style = shapely.geometry.JOIN_STYLE.round orig_polygon_object = vertex_arrays_to_polygon_object( vertex_x_metres, vertex_y_metres) max_buffer_polygon_object = orig_polygon_object.buffer( max_buffer_dist_metres, join_style=join_style) if numpy.isnan(min_buffer_dist_metres): return max_buffer_polygon_object min_buffer_polygon_object = orig_polygon_object.buffer( min_buffer_dist_metres, join_style=join_style) min_buffer_vertex_dict = polygon_object_to_vertex_arrays( min_buffer_polygon_object) max_buffer_vertex_dict = polygon_object_to_vertex_arrays( max_buffer_polygon_object) return vertex_arrays_to_polygon_object( max_buffer_vertex_dict[EXTERIOR_X_COLUMN], max_buffer_vertex_dict[EXTERIOR_Y_COLUMN], hole_x_coords_list=[min_buffer_vertex_dict[EXTERIOR_X_COLUMN]], hole_y_coords_list=[min_buffer_vertex_dict[EXTERIOR_Y_COLUMN]])
def _run(input_prediction_file_name, average_over_height, scale_by_climo, num_examples_per_set, output_dir_name): """Finds best and worst heating-rate predictions. This is effectively the main method. :param input_prediction_file_name: See documentation at top of file. :param average_over_height: Same. :param scale_by_climo: Same. :param num_examples_per_set: Same. :param output_dir_name: Same. """ # TODO(thunderhoser): Maybe allow specific height again (e.g., 15 km). error_checking.assert_is_greater(num_examples_per_set, 0) scale_by_climo = scale_by_climo and not average_over_height print('Reading data from: "{0:s}"...'.format(input_prediction_file_name)) prediction_dict = prediction_io.read_file(input_prediction_file_name) model_file_name = prediction_dict[prediction_io.MODEL_FILE_KEY] model_metafile_name = neural_net.find_metafile( model_dir_name=os.path.split(model_file_name)[0]) print( 'Reading model metadata from: "{0:s}"...'.format(model_metafile_name)) model_metadata_dict = neural_net.read_metafile(model_metafile_name) generator_option_dict = model_metadata_dict[ neural_net.TRAINING_OPTIONS_KEY] vector_target_names = ( generator_option_dict[neural_net.VECTOR_TARGET_NAMES_KEY]) hr_index = (vector_target_names.index( example_utils.SHORTWAVE_HEATING_RATE_NAME)) target_matrix_k_day01 = ( prediction_dict[prediction_io.VECTOR_TARGETS_KEY][..., hr_index]) prediction_matrix_k_day01 = ( prediction_dict[prediction_io.VECTOR_PREDICTIONS_KEY][..., hr_index]) bias_matrix = prediction_matrix_k_day01 - target_matrix_k_day01 absolute_error_matrix = numpy.absolute(bias_matrix) if average_over_height: bias_matrix = numpy.mean(bias_matrix, axis=1, keepdims=True) absolute_error_matrix = numpy.mean(absolute_error_matrix, axis=1, keepdims=True) if scale_by_climo: normalization_file_name = ( generator_option_dict[neural_net.NORMALIZATION_FILE_KEY]) print(('Reading training examples (for climatology) from: "{0:s}"...' ).format(normalization_file_name)) training_example_dict = example_io.read_file(normalization_file_name) training_example_dict = example_utils.subset_by_field( example_dict=training_example_dict, field_names=[example_utils.SHORTWAVE_HEATING_RATE_NAME]) training_example_dict = example_utils.subset_by_height( example_dict=training_example_dict, heights_m_agl=generator_option_dict[neural_net.HEIGHTS_KEY]) dummy_example_dict = { example_utils.SCALAR_PREDICTOR_NAMES_KEY: [], example_utils.VECTOR_PREDICTOR_NAMES_KEY: [], example_utils.SCALAR_TARGET_NAMES_KEY: [], example_utils.VECTOR_TARGET_NAMES_KEY: [example_utils.SHORTWAVE_HEATING_RATE_NAME], example_utils.HEIGHTS_KEY: generator_option_dict[neural_net.HEIGHTS_KEY] } mean_training_example_dict = normalization.create_mean_example( new_example_dict=dummy_example_dict, training_example_dict=training_example_dict) climo_matrix_k_day01 = mean_training_example_dict[ example_utils.VECTOR_TARGET_VALS_KEY][..., 0] bias_matrix = bias_matrix / climo_matrix_k_day01 absolute_error_matrix = absolute_error_matrix / climo_matrix_k_day01 print(SEPARATOR_STRING) high_bias_indices, low_bias_indices, low_abs_error_indices = ( misc_utils.find_best_and_worst_predictions( bias_matrix=bias_matrix, absolute_error_matrix=absolute_error_matrix, num_examples_per_set=num_examples_per_set)) print(SEPARATOR_STRING) high_bias_prediction_dict = prediction_io.subset_by_index( prediction_dict=copy.deepcopy(prediction_dict), desired_indices=high_bias_indices) high_bias_file_name = ( '{0:s}/predictions_high-bias.nc'.format(output_dir_name)) print('Writing examples with greatest positive bias to: "{0:s}"...'.format( high_bias_file_name)) prediction_io.write_file( netcdf_file_name=high_bias_file_name, scalar_target_matrix=high_bias_prediction_dict[ prediction_io.SCALAR_TARGETS_KEY], vector_target_matrix=high_bias_prediction_dict[ prediction_io.VECTOR_TARGETS_KEY], scalar_prediction_matrix=high_bias_prediction_dict[ prediction_io.SCALAR_PREDICTIONS_KEY], vector_prediction_matrix=high_bias_prediction_dict[ prediction_io.VECTOR_PREDICTIONS_KEY], heights_m_agl=high_bias_prediction_dict[prediction_io.HEIGHTS_KEY], example_id_strings=high_bias_prediction_dict[ prediction_io.EXAMPLE_IDS_KEY], model_file_name=high_bias_prediction_dict[ prediction_io.MODEL_FILE_KEY]) low_bias_prediction_dict = prediction_io.subset_by_index( prediction_dict=copy.deepcopy(prediction_dict), desired_indices=low_bias_indices) low_bias_file_name = ( '{0:s}/predictions_low-bias.nc'.format(output_dir_name)) print('Writing examples with greatest negative bias to: "{0:s}"...'.format( low_bias_file_name)) prediction_io.write_file( netcdf_file_name=low_bias_file_name, scalar_target_matrix=low_bias_prediction_dict[ prediction_io.SCALAR_TARGETS_KEY], vector_target_matrix=low_bias_prediction_dict[ prediction_io.VECTOR_TARGETS_KEY], scalar_prediction_matrix=low_bias_prediction_dict[ prediction_io.SCALAR_PREDICTIONS_KEY], vector_prediction_matrix=low_bias_prediction_dict[ prediction_io.VECTOR_PREDICTIONS_KEY], heights_m_agl=low_bias_prediction_dict[prediction_io.HEIGHTS_KEY], example_id_strings=low_bias_prediction_dict[ prediction_io.EXAMPLE_IDS_KEY], model_file_name=low_bias_prediction_dict[prediction_io.MODEL_FILE_KEY]) low_abs_error_prediction_dict = prediction_io.subset_by_index( prediction_dict=copy.deepcopy(prediction_dict), desired_indices=low_abs_error_indices) low_abs_error_file_name = ( '{0:s}/predictions_low-absolute-error.nc'.format(output_dir_name)) print( 'Writing examples with smallest absolute error to: "{0:s}"...'.format( low_abs_error_file_name)) prediction_io.write_file( netcdf_file_name=low_abs_error_file_name, scalar_target_matrix=low_abs_error_prediction_dict[ prediction_io.SCALAR_TARGETS_KEY], vector_target_matrix=low_abs_error_prediction_dict[ prediction_io.VECTOR_TARGETS_KEY], scalar_prediction_matrix=low_abs_error_prediction_dict[ prediction_io.SCALAR_PREDICTIONS_KEY], vector_prediction_matrix=low_abs_error_prediction_dict[ prediction_io.VECTOR_PREDICTIONS_KEY], heights_m_agl=low_abs_error_prediction_dict[prediction_io.HEIGHTS_KEY], example_id_strings=low_abs_error_prediction_dict[ prediction_io.EXAMPLE_IDS_KEY], model_file_name=low_abs_error_prediction_dict[ prediction_io.MODEL_FILE_KEY]) if scale_by_climo: return if average_over_height: mean_targets_k_day01 = numpy.mean(target_matrix_k_day01, axis=1) sort_indices = numpy.argsort(-1 * mean_targets_k_day01) else: max_targets_k_day01 = numpy.max(target_matrix_k_day01, axis=1) sort_indices = numpy.argsort(-1 * max_targets_k_day01) large_hr_indices = sort_indices[:num_examples_per_set] large_hr_prediction_dict = prediction_io.subset_by_index( prediction_dict=copy.deepcopy(prediction_dict), desired_indices=large_hr_indices) large_hr_file_name = ( '{0:s}/predictions_large-heating-rate.nc'.format(output_dir_name)) print('Writing examples with greatest heating rate to: "{0:s}"...'.format( large_hr_file_name)) prediction_io.write_file( netcdf_file_name=large_hr_file_name, scalar_target_matrix=large_hr_prediction_dict[ prediction_io.SCALAR_TARGETS_KEY], vector_target_matrix=large_hr_prediction_dict[ prediction_io.VECTOR_TARGETS_KEY], scalar_prediction_matrix=large_hr_prediction_dict[ prediction_io.SCALAR_PREDICTIONS_KEY], vector_prediction_matrix=large_hr_prediction_dict[ prediction_io.VECTOR_PREDICTIONS_KEY], heights_m_agl=large_hr_prediction_dict[prediction_io.HEIGHTS_KEY], example_id_strings=large_hr_prediction_dict[ prediction_io.EXAMPLE_IDS_KEY], model_file_name=large_hr_prediction_dict[prediction_io.MODEL_FILE_KEY]) if not average_over_height: return mean_targets_k_day01 = numpy.mean(target_matrix_k_day01, axis=1) sort_indices = numpy.argsort(mean_targets_k_day01) small_hr_indices = sort_indices[:num_examples_per_set] small_hr_prediction_dict = prediction_io.subset_by_index( prediction_dict=copy.deepcopy(prediction_dict), desired_indices=small_hr_indices) small_hr_file_name = ( '{0:s}/predictions_small-heating-rate.nc'.format(output_dir_name)) print('Writing examples with smallest heating rate to: "{0:s}"...'.format( small_hr_file_name)) prediction_io.write_file( netcdf_file_name=small_hr_file_name, scalar_target_matrix=small_hr_prediction_dict[ prediction_io.SCALAR_TARGETS_KEY], vector_target_matrix=small_hr_prediction_dict[ prediction_io.VECTOR_TARGETS_KEY], scalar_prediction_matrix=small_hr_prediction_dict[ prediction_io.SCALAR_PREDICTIONS_KEY], vector_prediction_matrix=small_hr_prediction_dict[ prediction_io.VECTOR_PREDICTIONS_KEY], heights_m_agl=small_hr_prediction_dict[prediction_io.HEIGHTS_KEY], example_id_strings=small_hr_prediction_dict[ prediction_io.EXAMPLE_IDS_KEY], model_file_name=small_hr_prediction_dict[prediction_io.MODEL_FILE_KEY])
def write_storm_to_winds_table(storm_to_winds_table, pickle_file_names): """Writes linkages (storm-to-wind associations) to one or more Pickle files. N = number of output files (should equal number of input files to _read_storm_tracks). K = number of wind observations linked to a given storm cell :param storm_to_winds_table: pandas DataFrame with the following mandatory columns. May also contain distance buffers created by `storm_tracking_io.make_buffers_around_polygons`. Each row is one storm object. storm_to_winds_table.storm_id: String ID for storm cell. storm_to_winds_table.unix_time_sec: Valid time. storm_to_winds_table.tracking_start_time_unix_sec: Start time for tracking period. storm_to_winds_table.tracking_end_time_unix_sec: End time for tracking period. storm_to_winds_table.centroid_lat_deg: Latitude at centroid of storm object (deg N). storm_to_winds_table.centroid_lng_deg: Longitude at centroid of storm object (deg E). storm_to_winds_table.polygon_object_latlng: Instance of `shapely.geometry.Polygon`, with vertices in lat-long coordinates. storm_to_winds_table.wind_station_ids: length-K list with string IDs of wind stations. storm_to_winds_table.wind_latitudes_deg: length-K numpy array with latitudes (deg N) of wind stations. storm_to_winds_table.wind_longitudes_deg: length-K numpy array with longitudes (deg E) of wind stations. storm_to_winds_table.u_winds_m_s01: length-K numpy array with u-components (metres per second) of wind velocities. storm_to_winds_table.v_winds_m_s01: length-K numpy array with v-components (metres per second) of wind velocities. storm_to_winds_table.wind_distances_metres: length-K numpy array with distances of wind observations from storm object. storm_to_winds_table.relative_wind_times_sec: length-K numpy array with relative times of wind observations (wind time minus storm-object time). :param pickle_file_names: length-N list of paths to output files. """ error_checking.assert_is_string_list(pickle_file_names) error_checking.assert_is_numpy_array(numpy.asarray(pickle_file_names), num_dimensions=1) max_file_index = numpy.max(storm_to_winds_table[FILE_INDEX_COLUMN].values) num_files = len(pickle_file_names) error_checking.assert_is_greater(num_files, max_file_index) columns_to_write = get_columns_to_write(storm_to_winds_table) for i in range(num_files): print( 'Writing linkages (storm-to-wind associations) to file ' '{0:d}/{1:d}: "{2:s}"...').format(i + 1, num_files, pickle_file_names[i]) file_system_utils.mkdir_recursive_if_necessary( file_name=pickle_file_names[i]) this_file_handle = open(pickle_file_names[i], 'wb') this_table = storm_to_winds_table.loc[ storm_to_winds_table[FILE_INDEX_COLUMN] == i][columns_to_write] pickle.dump(this_table, this_file_handle) this_file_handle.close()
def polygons_to_mask(polygon_objects_grid_coords, num_grid_rows, num_grid_columns, num_panel_rows, num_panel_columns, panel_row_by_polygon, panel_column_by_polygon): """Converts list of polygons to one binary mask for each panel. M = number of rows in grid N = number of columns in grid J = number of panel rows in image K = number of panel columns in image :param polygon_objects_grid_coords: See doc for `polygons_from_pixel_to_grid_coords`. :param num_grid_rows: Same. :param num_grid_columns: Same. :param num_panel_rows: Same. :param num_panel_columns: Same. :param panel_row_by_polygon: Same. :param panel_column_by_polygon: Same. :return: mask_matrix: J-by-K-by-M-by-N numpy array of Boolean flags. If mask_matrix[j, k, m, n] == True, grid point [m, n] in panel [j, k] is in/on at least one of the polygons. """ error_checking.assert_is_integer(num_grid_rows) error_checking.assert_is_greater(num_grid_rows, 0) error_checking.assert_is_integer(num_grid_columns) error_checking.assert_is_greater(num_grid_columns, 0) _check_polygons(polygon_objects_grid_coords=polygon_objects_grid_coords, num_panel_rows=num_panel_rows, num_panel_columns=num_panel_columns, panel_row_by_polygon=panel_row_by_polygon, panel_column_by_polygon=panel_column_by_polygon) mask_matrix = numpy.full( (num_panel_rows, num_panel_columns, num_grid_rows, num_grid_columns), False, dtype=bool) num_polygons = len(polygon_objects_grid_coords) if num_polygons == 0: return mask_matrix panel_coord_matrix = numpy.hstack( (numpy.reshape(panel_row_by_polygon, (num_polygons, 1)), numpy.reshape(panel_column_by_polygon, (num_polygons, 1)))) panel_coord_matrix = numpy.unique(panel_coord_matrix.astype(int), axis=0) for i in range(panel_coord_matrix.shape[0]): this_panel_row = panel_coord_matrix[i, 0] this_panel_column = panel_coord_matrix[i, 1] these_polygon_indices = numpy.where( numpy.logical_and(panel_row_by_polygon == this_panel_row, panel_column_by_polygon == this_panel_column))[0] these_polygon_objects = [ polygon_objects_grid_coords[k] for k in these_polygon_indices ] mask_matrix[this_panel_row, this_panel_column, ...] = (_polygons_to_mask_one_panel( polygon_objects_grid_coords=these_polygon_objects, num_grid_rows=num_grid_rows, num_grid_columns=num_grid_columns)) return mask_matrix
def create_map_with_nwp_proj(model_name, grid_name=None, latlng_limit_dict=None, xy_limit_dict=None, figure_width_inches=DEFAULT_FIGURE_WIDTH_INCHES, figure_height_inches=DEFAULT_FIGURE_HEIGHT_INCHES, resolution_string=DEFAULT_RESOLUTION_STRING): """Initializes map with same projection as NWP model. However, this map will have false easting = false northing = 0 metres. If `latlng_limit_dict is not None`, corners of the map will be determined by lat-long coords. If `xy_limit_dict is not None`, corners of the map will be determined by x-y coords. If both are None, corners of the map will be x-y corners of model grid. :param model_name: See doc for `nwp_model_utils.check_grid_name`. :param grid_name: See doc for `nwp_model_utils.check_grid_name`. :param latlng_limit_dict: Dictionary with the following keys: latlng_limit_dict['min_latitude_deg']: Minimum latitude (deg N) in map. latlng_limit_dict['max_latitude_deg']: Max latitude (deg N) in map. latlng_limit_dict['min_longitude_deg']: Minimum longitude (deg E) in map. latlng_limit_dict['max_longitude_deg']: Max longitude (deg E) in map. :param xy_limit_dict: Dictionary with the following keys: xy_limit_dict['x_min_metres']: Minimum x-coord in map. xy_limit_dict['x_max_metres']: Max x-coord in map. xy_limit_dict['y_min_metres']: Minimum y-coord in map. xy_limit_dict['y_max_metres']: Max y-coord in map. :param figure_width_inches: Figure width. :param figure_height_inches: Figure height. :param resolution_string: See doc for `create_lambert_conformal_map`. :return: figure_object: Same. :return: axes_object: Same. :return: basemap_object: Same. """ nwp_model_utils.check_grid_name(model_name=model_name, grid_name=grid_name) standard_latitudes_deg, central_longitude_deg = ( nwp_model_utils.get_projection_params(model_name)) if latlng_limit_dict is None and xy_limit_dict is None: all_x_coords_metres, all_y_coords_metres = ( nwp_model_utils.get_xy_grid_cell_edges(model_name=model_name, grid_name=grid_name)) false_easting_metres, false_northing_metres = ( nwp_model_utils.get_false_easting_and_northing( model_name=model_name, grid_name=grid_name)) all_x_coords_metres -= false_easting_metres all_y_coords_metres -= false_northing_metres xy_limit_dict = { X_MIN_KEY: numpy.min(all_x_coords_metres), X_MAX_KEY: numpy.max(all_x_coords_metres), Y_MIN_KEY: numpy.min(all_y_coords_metres), Y_MAX_KEY: numpy.max(all_y_coords_metres) } figure_object, axes_object = pyplot.subplots( 1, 1, figsize=(figure_width_inches, figure_height_inches)) if latlng_limit_dict is not None: min_latitude_deg = latlng_limit_dict[MIN_LATITUDE_KEY] max_latitude_deg = latlng_limit_dict[MAX_LATITUDE_KEY] error_checking.assert_is_valid_lat_numpy_array( numpy.array([min_latitude_deg, max_latitude_deg])) min_longitude_deg = lng_conversion.convert_lng_positive_in_west( latlng_limit_dict[MIN_LONGITUDE_KEY]) max_longitude_deg = lng_conversion.convert_lng_positive_in_west( latlng_limit_dict[MAX_LONGITUDE_KEY]) error_checking.assert_is_greater(max_latitude_deg, min_latitude_deg) error_checking.assert_is_greater(max_longitude_deg, min_longitude_deg) basemap_object = Basemap(projection='lcc', lat_1=standard_latitudes_deg[0], lat_2=standard_latitudes_deg[1], lon_0=central_longitude_deg, rsphere=EARTH_RADIUS_METRES, ellps=ELLIPSOID_NAME, resolution=resolution_string, llcrnrlat=min_latitude_deg, llcrnrlon=min_longitude_deg, urcrnrlat=max_latitude_deg, urcrnrlon=max_longitude_deg) else: x_min_metres = xy_limit_dict[X_MIN_KEY] x_max_metres = xy_limit_dict[X_MAX_KEY] y_min_metres = xy_limit_dict[Y_MIN_KEY] y_max_metres = xy_limit_dict[Y_MAX_KEY] error_checking.assert_is_greater(x_max_metres, x_min_metres) error_checking.assert_is_greater(y_max_metres, y_min_metres) basemap_object = Basemap(projection='lcc', lat_1=standard_latitudes_deg[0], lat_2=standard_latitudes_deg[1], lon_0=central_longitude_deg, rsphere=EARTH_RADIUS_METRES, ellps=ELLIPSOID_NAME, resolution=resolution_string, llcrnrx=x_min_metres, urcrnrx=x_max_metres, llcrnry=y_min_metres, urcrnry=y_max_metres) return figure_object, axes_object, basemap_object
def plot_wind_barbs(basemap_object=None, axes_object=None, latitudes_deg=None, longitudes_deg=None, u_winds_m_s01=None, v_winds_m_s01=None, barb_length=DEFAULT_BARB_LENGTH, empty_barb_radius=DEFAULT_EMPTY_BARB_RADIUS, fill_empty_barb=FILL_EMPTY_BARB_DEFAULT, colour_map=DEFAULT_COLOUR_MAP, colour_minimum_kt=DEFAULT_COLOUR_MINIMUM_KT, colour_maximum_kt=DEFAULT_COLOUR_MAXIMUM_KT): """Plots wind barbs. N = number of wind barbs :param basemap_object: Instance of `mpl_toolkits.basemap.Basemap`. :param axes_object: Instance of `matplotlib.axes._subplots.AxesSubplot`. :param latitudes_deg: length-N numpy array of latitudes (deg N). :param longitudes_deg: length-N numpy array of longitudes (deg E). :param u_winds_m_s01: length-N numpy array of eastward wind velocities (m/s). :param v_winds_m_s01: length-N numpy array of northward wind velocities (m/s). :param barb_length: Length of each wind barb. :param empty_barb_radius: Radius of circle for 0-metre-per-second wind barb. :param fill_empty_barb: Boolean flag. If fill_empty_barb = True, 0-metre-per-second wind barb will be a filled circle. Otherwise, it will be an empty circle. :param colour_map: Instance of `matplotlib.pyplot.cm`. :param colour_minimum_kt: Minimum speed for colour map (kt or nautical miles per hour). :param colour_maximum_kt: Maximum speed for colour map (kt or nautical miles per hour). """ error_checking.assert_is_valid_lat_numpy_array(latitudes_deg) error_checking.assert_is_numpy_array(latitudes_deg, num_dimensions=1) num_points = len(latitudes_deg) longitudes_deg = lng_conversion.convert_lng_positive_in_west( longitudes_deg) error_checking.assert_is_numpy_array(longitudes_deg, exact_dimensions=numpy.array( [num_points])) error_checking.assert_is_numpy_array_without_nan(u_winds_m_s01) error_checking.assert_is_numpy_array(u_winds_m_s01, exact_dimensions=numpy.array( [num_points])) error_checking.assert_is_numpy_array_without_nan(v_winds_m_s01) error_checking.assert_is_numpy_array(v_winds_m_s01, exact_dimensions=numpy.array( [num_points])) error_checking.assert_is_geq(colour_minimum_kt, 0.) error_checking.assert_is_greater(colour_maximum_kt, colour_minimum_kt) x_coords_metres, y_coords_metres = basemap_object(longitudes_deg, latitudes_deg) size_dict = {'emptybarb': empty_barb_radius} colour_limits_kt = numpy.array([colour_minimum_kt, colour_maximum_kt]) wind_speeds_m_s01 = numpy.sqrt(u_winds_m_s01**2 + v_winds_m_s01**2) axes_object.barbs(x_coords_metres, y_coords_metres, u_winds_m_s01 * METRES_PER_SECOND_TO_KT, v_winds_m_s01 * METRES_PER_SECOND_TO_KT, wind_speeds_m_s01 * METRES_PER_SECOND_TO_KT, length=barb_length, sizes=size_dict, fill_empty=fill_empty_barb, rounding=False, cmap=colour_map, clim=colour_limits_kt)
def apply_2d3d_cnn( model_object, reflectivity_matrix_dbz, azimuthal_shear_matrix_s01, sounding_matrix=None, num_examples_per_batch=100, verbose=False, return_features=False, feature_layer_name=None): """Applies CNN to both 2-D and 3-D radar images (and possibly soundings). M = number of rows in each reflectivity image N = number of columns in each reflectivity image :param model_object: Trained instance of `keras.models.Model` or `keras.models.Sequential`. :param reflectivity_matrix_dbz: numpy array (E x M x N x H_r x 1) of storm-centered reflectivity images. :param azimuthal_shear_matrix_s01: numpy array (E x 2M x 2N x C) of storm-centered azimuthal-shear images. :param sounding_matrix: See doc for `apply_2d_or_3d_cnn`. :param num_examples_per_batch: Same. :param verbose: Same. :param return_features: Same. :param feature_layer_name: Same. If return_features = True... :return: feature_matrix: See doc for `apply_2d_or_3d_cnn`. If return_features = False... :return: class_probability_matrix: See doc for `apply_2d_or_3d_cnn`. """ dl_utils.check_radar_images( radar_image_matrix=reflectivity_matrix_dbz, min_num_dimensions=5, max_num_dimensions=5) dl_utils.check_radar_images( radar_image_matrix=azimuthal_shear_matrix_s01, min_num_dimensions=4, max_num_dimensions=4) if sounding_matrix is not None: dl_utils.check_soundings( sounding_matrix=sounding_matrix, num_examples=reflectivity_matrix_dbz.shape[0] ) dl_utils.check_soundings( sounding_matrix=sounding_matrix, num_examples=azimuthal_shear_matrix_s01.shape[0] ) num_examples = reflectivity_matrix_dbz.shape[0] if num_examples_per_batch is None: num_examples_per_batch = num_examples + 0 else: error_checking.assert_is_integer(num_examples_per_batch) error_checking.assert_is_greater(num_examples_per_batch, 0) num_examples_per_batch = min([num_examples_per_batch, num_examples]) error_checking.assert_is_boolean(verbose) error_checking.assert_is_boolean(return_features) if return_features: model_object_to_use = model_to_feature_generator( model_object=model_object, feature_layer_name=feature_layer_name) else: model_object_to_use = model_object output_matrix = None for i in range(0, num_examples, num_examples_per_batch): this_first_index = i this_last_index = min( [i + num_examples_per_batch - 1, num_examples - 1] ) these_indices = numpy.linspace( this_first_index, this_last_index, num=this_last_index - this_first_index + 1, dtype=int) if verbose: print(( 'Applying model to examples {0:d}-{1:d} of {2:d}...' ).format( this_first_index + 1, this_last_index + 1, num_examples )) if sounding_matrix is None: these_predictor_matrices = [ reflectivity_matrix_dbz[these_indices, ...], azimuthal_shear_matrix_s01[these_indices, ...] ] else: these_predictor_matrices = [ reflectivity_matrix_dbz[these_indices, ...], azimuthal_shear_matrix_s01[these_indices, ...], sounding_matrix[these_indices, ...] ] these_outputs = model_object_to_use.predict( these_predictor_matrices, batch_size=len(these_indices) ) if output_matrix is None: output_matrix = these_outputs + 0. else: output_matrix = numpy.concatenate( (output_matrix, these_outputs), axis=0) if verbose: print('Have applied model to all {0:d} examples!'.format(num_examples)) if return_features: return output_matrix return _binary_probabilities_to_matrix(output_matrix)