def check_metadata(layer_name, neuron_indices, ideal_activation, num_iterations, learning_rate, l2_weight): """Checks metadata for errors. :param layer_name: Name of layer with relevant neuron. :param neuron_indices: 1-D numpy array with indices of relevant neuron. Must have length D - 1, where D = number of dimensions in layer output. The first dimension is the batch dimension, which always has length `None` in Keras. :param ideal_activation: Ideal neuron activation, used to define loss function. The loss function will be (neuron_activation - ideal_activation)**2. :param num_iterations: Number of iterations for gradient descent. :param learning_rate: Learning rate for gradient descent. :param l2_weight: L2 weight (penalty for difference between initial and final predictor matrix) in loss function. """ error_checking.assert_is_string(layer_name) error_checking.assert_is_integer_numpy_array(neuron_indices) error_checking.assert_is_geq_numpy_array(neuron_indices, 0) error_checking.assert_is_numpy_array(neuron_indices, num_dimensions=1) error_checking.assert_is_not_nan(ideal_activation) error_checking.assert_is_integer(num_iterations) error_checking.assert_is_greater(num_iterations, 0) error_checking.assert_is_greater(learning_rate, 0.) error_checking.assert_is_less_than(learning_rate, 1.) error_checking.assert_is_geq(l2_weight, 0.)
def _check_input_args(num_iterations, learning_rate, l2_weight=None, radar_constraint_weight=None, minmax_constraint_weight=None, ideal_activation=None): """Error-checks input args for backwards optimization. :param num_iterations: See doc for `_do_gradient_descent`. :param learning_rate: Same. :param l2_weight: Same. :param radar_constraint_weight: Weight used to multiply part of loss function with radar constraints (see doc for `_radar_constraints_to_loss_fn`). :param minmax_constraint_weight: Weight used to multiply part of loss function with min-max constraints (see doc for `_minmax_constraints_to_loss_fn`). :param ideal_activation: See doc for `optimize_input_for_neuron_activation` or `optimize_input_for_channel_activation`. """ error_checking.assert_is_integer(num_iterations) error_checking.assert_is_greater(num_iterations, 0) error_checking.assert_is_greater(learning_rate, 0.) error_checking.assert_is_less_than(learning_rate, 1.) if l2_weight is not None: error_checking.assert_is_greater(l2_weight, 0.) if radar_constraint_weight is not None: error_checking.assert_is_greater(radar_constraint_weight, 0.) if minmax_constraint_weight is not None: error_checking.assert_is_greater(minmax_constraint_weight, 0.) if ideal_activation is not None: error_checking.assert_is_greater(ideal_activation, 0.)
def get_saliency_maps_for_class_activation(model_object, target_class, list_of_input_matrices): """For each input example, creates saliency map for prob of target class. :param model_object: Instance of `keras.models.Model`. :param target_class: Saliency maps will be created for this class. Must be an integer in 0...(K - 1), where K = number of classes. :param list_of_input_matrices: See doc for `_do_saliency_calculations`. :return: list_of_saliency_matrices: See doc for `_do_saliency_calculations`. """ check_metadata( component_type_string=model_interpretation.CLASS_COMPONENT_TYPE_STRING, target_class=target_class) num_output_neurons = model_object.layers[-1].output.get_shape().as_list( )[-1] if num_output_neurons == 1: error_checking.assert_is_leq(target_class, 1) if target_class == 1: loss_tensor = K.mean( (model_object.layers[-1].output[..., 0] - 1)**2) else: loss_tensor = K.mean(model_object.layers[-1].output[..., 0]**2) else: error_checking.assert_is_less_than(target_class, num_output_neurons) loss_tensor = K.mean( (model_object.layers[-1].output[..., target_class] - 1)**2) return _do_saliency_calculations( model_object=model_object, loss_tensor=loss_tensor, list_of_input_matrices=list_of_input_matrices)
def filter_svd_by_explained_variance( svd_dictionary, fraction_of_variance_to_keep=DEFAULT_FRACTION_OF_VARIANCE_TO_KEEP): """Filters SVD results by explained variance. :param svd_dictionary: Dictionary returned by perform_svd. :param fraction_of_variance_to_keep: Fraction of variance to keep. Will select modes in descending order until they explain >= `fraction_of_variance_to_keep` of total variance in dataset. :return: svd_dictionary: Same as input, except that arrays may be shorter. """ error_checking.assert_is_greater(fraction_of_variance_to_keep, 0.) error_checking.assert_is_less_than(fraction_of_variance_to_keep, 1.) eigenvalue_by_mode = numpy.diag(svd_dictionary[EIGENVALUE_MATRIX_KEY]) explained_variance_by_mode = (eigenvalue_by_mode / numpy.sum(eigenvalue_by_mode)) cumul_explained_variance_by_mode = numpy.cumsum(explained_variance_by_mode) num_modes_to_keep = 1 + numpy.where( cumul_explained_variance_by_mode >= fraction_of_variance_to_keep)[0][0] svd_dictionary[PC_MATRIX_KEY] = ( svd_dictionary[PC_MATRIX_KEY][:, :num_modes_to_keep]) svd_dictionary[EIGENVALUE_MATRIX_KEY] = ( svd_dictionary[EIGENVALUE_MATRIX_KEY] [:num_modes_to_keep, :num_modes_to_keep]) svd_dictionary[EOF_MATRIX_KEY] = ( svd_dictionary[EOF_MATRIX_KEY][:, :num_modes_to_keep]) return svd_dictionary
def _check_region_dict(list_of_cam_matrices, region_dict, pmm_flag): """Error-checks dictionary with regions of interest. :param list_of_cam_matrices: See doc for `_check_in_and_out_matrices`. :param region_dict: Dictionary with the following keys. region_dict['list_of_mask_matrices']: Same as `list_of_cam_matrices`, except that all numpy arrays. Grid cells marked True are in a region of interest. region_dict['list_of_polygon_objects']: Triple-nested list of polygons (instances of `shapely.geometry.Polygon`), demarcating regions of interest. list_of_polygon_objects[i][j][k] is the [k]th region of interest for the [j]th input matrix for the [i]th example. region_dict['percentile_threshold']: Percentile threshold used to create regions of interest. This is applied separately to each class-activation matrix for each example. region_dict['min_class_activation']: Minimum class activation used to create regions of interest. For a grid cell to be in a region of interest, it must meet both this and the percentile threshold. :param pmm_flag: Boolean flag. If True, inputs should contain PMM (probability-matched mean) composites. """ error_checking.assert_is_geq(region_dict[PERCENTILE_THRESHOLD_KEY], 50.) error_checking.assert_is_less_than( region_dict[PERCENTILE_THRESHOLD_KEY], 100. ) error_checking.assert_is_greater(region_dict[MIN_CLASS_ACTIVATION_KEY], 0.) list_of_mask_matrices = region_dict[MASK_MATRICES_KEY] list_of_polygon_objects = region_dict[POLYGON_OBJECTS_KEY] num_input_matrices = len(list_of_cam_matrices) assert len(list_of_mask_matrices) == num_input_matrices assert len(list_of_polygon_objects) == num_input_matrices for j in range(num_input_matrices): if list_of_cam_matrices[j] is None: assert list_of_mask_matrices[j] is None continue error_checking.assert_is_boolean_numpy_array(list_of_mask_matrices[j]) these_expected_dim = numpy.array( list_of_cam_matrices[j].shape, dtype=int ) error_checking.assert_is_numpy_array( list_of_mask_matrices[j], exact_dimensions=these_expected_dim ) if pmm_flag: num_examples = 1 else: num_examples = list_of_cam_matrices[j].shape[0] assert len(list_of_polygon_objects[j]) == num_examples for i in range(num_examples): error_checking.assert_is_list(list_of_polygon_objects[j][i])
def _eval_sfs_stopping_criterion( min_loss_decrease, min_percentage_loss_decrease, num_steps_for_loss_decrease, lowest_cost_by_step): """Evaluates stopping criterion for sequential forward selection (SFS). :param min_loss_decrease: If the loss has decreased by less than `min_loss_decrease` over the last `num_steps_for_loss_decrease` steps, the algorithm will stop. :param min_percentage_loss_decrease: [used only if `min_loss_decrease is None`] If the loss has decreased by less than `min_percentage_loss_decrease` over the last `num_steps_for_loss_decrease` steps, the algorithm will stop. :param num_steps_for_loss_decrease: See above. :param lowest_cost_by_step: 1-D numpy array, where the [i]th value is the cost after the [i]th step. The last step is the current one, so the current cost is lowest_cost_by_step[-1]. :return: stopping_criterion: Boolean flag. :raises: ValueError: if both `min_loss_decrease` and `min_percentage_loss_decrease` are None. """ if min_loss_decrease is None and min_percentage_loss_decrease is None: raise ValueError('Either min_loss_decrease or ' 'min_percentage_loss_decrease must be specified.') if min_loss_decrease is None: error_checking.assert_is_greater(min_percentage_loss_decrease, 0.) error_checking.assert_is_less_than(min_percentage_loss_decrease, 100.) else: min_percentage_loss_decrease = None error_checking.assert_is_greater(min_loss_decrease, 0.) error_checking.assert_is_integer(num_steps_for_loss_decrease) error_checking.assert_is_greater(num_steps_for_loss_decrease, 0) if len(lowest_cost_by_step) <= num_steps_for_loss_decrease: return False previous_loss = lowest_cost_by_step[-(num_steps_for_loss_decrease + 1)] if min_loss_decrease is None: min_loss_decrease = previous_loss * min_percentage_loss_decrease / 100 max_new_loss = previous_loss - min_loss_decrease print(( 'Previous loss ({0:d} steps ago) = {1:.4e} ... minimum loss decrease = ' '{2:.4e} ... thus, max new loss = {3:.4e} ... actual new loss = {4:.4e}' ).format( num_steps_for_loss_decrease, previous_loss, min_loss_decrease, max_new_loss, lowest_cost_by_step[-1] )) return lowest_cost_by_step[-1] > max_new_loss
def get_dropout_layer(dropout_fraction): """Creates dropout layer. :param dropout_fraction: Fraction of weights to drop. :return: layer_object: Instance of `keras.layers.Dropout`. """ error_checking.assert_is_greater(dropout_fraction, 0.) error_checking.assert_is_less_than(dropout_fraction, 1.) return keras.layers.Dropout(rate=dropout_fraction)
def _check_input_args(option_dict): """Error-checks input arguments. :param option_dict: See doc for `find_convective_pixels`. :return: option_dict: Same as input, except that defaults might have been added. """ if option_dict is None: orig_option_dict = {} else: orig_option_dict = option_dict.copy() option_dict = DEFAULT_OPTION_DICT.copy() option_dict.update(orig_option_dict) option_dict[PEAKEDNESS_NEIGH_KEY] = float( option_dict[PEAKEDNESS_NEIGH_KEY]) option_dict[MAX_PEAKEDNESS_HEIGHT_KEY] = float( option_dict[MAX_PEAKEDNESS_HEIGHT_KEY]) option_dict[MIN_HEIGHT_FRACTION_KEY] = float( option_dict[MIN_HEIGHT_FRACTION_KEY]) option_dict[MIN_ECHO_TOP_KEY] = int( numpy.round(option_dict[MIN_ECHO_TOP_KEY])) option_dict[ECHO_TOP_LEVEL_KEY] = float(option_dict[ECHO_TOP_LEVEL_KEY]) option_dict[MIN_SIZE_KEY] = int(numpy.round(option_dict[MIN_SIZE_KEY])) option_dict[MIN_COMPOSITE_REFL_CRITERION5_KEY] = float( option_dict[MIN_COMPOSITE_REFL_CRITERION5_KEY]) option_dict[MIN_COMPOSITE_REFL_AML_KEY] = float( option_dict[MIN_COMPOSITE_REFL_AML_KEY]) error_checking.assert_is_greater(option_dict[PEAKEDNESS_NEIGH_KEY], 0.) error_checking.assert_is_greater(option_dict[MAX_PEAKEDNESS_HEIGHT_KEY], 0.) error_checking.assert_is_greater(option_dict[MIN_HEIGHT_FRACTION_KEY], 0.) error_checking.assert_is_less_than(option_dict[MIN_HEIGHT_FRACTION_KEY], 1.) error_checking.assert_is_boolean(option_dict[HALVE_RESOLUTION_KEY]) error_checking.assert_is_greater(option_dict[MIN_ECHO_TOP_KEY], 0) error_checking.assert_is_greater(option_dict[ECHO_TOP_LEVEL_KEY], 0.) error_checking.assert_is_greater(option_dict[MIN_SIZE_KEY], 1) error_checking.assert_is_greater( option_dict[MIN_COMPOSITE_REFL_CRITERION5_KEY], 0.) error_checking.assert_is_greater(option_dict[MIN_COMPOSITE_REFL_AML_KEY], 0.) if option_dict[MIN_COMPOSITE_REFL_CRITERION1_KEY] is not None: option_dict[MIN_COMPOSITE_REFL_CRITERION1_KEY] = float( option_dict[MIN_COMPOSITE_REFL_CRITERION1_KEY]) error_checking.assert_is_greater( option_dict[MIN_COMPOSITE_REFL_CRITERION1_KEY], 0.) return option_dict
def create_model( num_classes, num_trees=DEFAULT_NUM_TREES, learning_rate=DEFAULT_LEARNING_RATE, max_depth=DEFAULT_MAX_TREE_DEPTH, fraction_of_examples_per_tree=DEFAULT_FRACTION_OF_EXAMPLES_PER_TREE, fraction_of_features_per_split=DEFAULT_FRACTION_OF_FEATURES_PER_SPLIT, l2_weight=DEFAULT_L2_WEIGHT): """Creates GBT model for classification. :param num_classes: Number of target classes. If num_classes = 2, the model will do binary probabilistic classification. If num_classes > 2, the model will do multiclass probabilistic classification. :param num_trees: Number of trees. :param learning_rate: Learning rate. :param max_depth: Maximum depth (applied to each tree). :param fraction_of_examples_per_tree: Fraction of examples (storm objects) to be used in training each tree. :param fraction_of_features_per_split: Fraction of features (predictor variables) to be used at each split point. :param l2_weight: L2-regularization weight. :return: model_object: Untrained instance of `xgboost.XGBClassifier`. """ error_checking.assert_is_integer(num_classes) error_checking.assert_is_geq(num_classes, 2) error_checking.assert_is_integer(num_trees) error_checking.assert_is_geq(num_trees, 10) error_checking.assert_is_leq(num_trees, 1000) error_checking.assert_is_greater(learning_rate, 0.) error_checking.assert_is_less_than(learning_rate, 1.) error_checking.assert_is_integer(max_depth) error_checking.assert_is_geq(max_depth, 1) error_checking.assert_is_leq(max_depth, 10) error_checking.assert_is_greater(fraction_of_examples_per_tree, 0.) error_checking.assert_is_leq(fraction_of_examples_per_tree, 1.) error_checking.assert_is_greater(fraction_of_features_per_split, 0.) error_checking.assert_is_leq(fraction_of_features_per_split, 1.) error_checking.assert_is_geq(l2_weight, 0.) if num_classes == 2: return xgboost.XGBClassifier( max_depth=max_depth, learning_rate=learning_rate, n_estimators=num_trees, silent=False, objective='binary:logistic', subsample=fraction_of_examples_per_tree, colsample_bylevel=fraction_of_features_per_split, reg_lambda=l2_weight) return xgboost.XGBClassifier( max_depth=max_depth, learning_rate=learning_rate, n_estimators=num_trees, silent=False, objective='multi:softprob', subsample=fraction_of_examples_per_tree, colsample_bylevel=fraction_of_features_per_split, reg_lambda=l2_weight, num_class=num_classes)
def get_dropout_layer(dropout_fraction, layer_name=None): """Creates dropout layer. :param dropout_fraction: Fraction of weights to drop. :param layer_name: Layer name (string). If None, will use default name in Keras. :return: layer_object: Instance of `keras.layers.Dropout`. """ error_checking.assert_is_greater(dropout_fraction, 0.) error_checking.assert_is_less_than(dropout_fraction, 1.) return keras.layers.Dropout(rate=dropout_fraction, name=layer_name)
def get_front_types(locating_var_matrix_m01_s01, warm_front_percentile=DEFAULT_FRONT_PERCENTILE, cold_front_percentile=DEFAULT_FRONT_PERCENTILE): """Infers front type at each grid cell. M = number of rows in grid N = number of columns in grid :param locating_var_matrix_m01_s01: M-by-N numpy array created by `get_locating_variable`. :param warm_front_percentile: Used to locate warm fronts. For grid cell [i, j] to be considered part of a warm front, its locating value must be <= the [q]th percentile of all non-positive values in the grid, where q = `100 - warm_front_percentile`. :param cold_front_percentile: Used to locate cold fronts. For grid cell [i, j] to be considered part of a cold front, its locating value must be >= the [q]th percentile of all non-negative values in the grid, where q = `cold_front_percentile`. :return: predicted_label_matrix: M-by-N numpy array, where the value at each grid cell is from the list `front_utils.VALID_INTEGER_IDS`. """ error_checking.assert_is_numpy_array_without_nan( locating_var_matrix_m01_s01) error_checking.assert_is_numpy_array(locating_var_matrix_m01_s01, num_dimensions=2) error_checking.assert_is_greater(warm_front_percentile, 0.) error_checking.assert_is_less_than(warm_front_percentile, 100.) error_checking.assert_is_greater(cold_front_percentile, 0.) error_checking.assert_is_less_than(cold_front_percentile, 100.) warm_front_threshold_m01_s01 = numpy.percentile( locating_var_matrix_m01_s01[locating_var_matrix_m01_s01 <= 0], 100 - warm_front_percentile) cold_front_threshold_m01_s01 = numpy.percentile( locating_var_matrix_m01_s01[locating_var_matrix_m01_s01 >= 0], cold_front_percentile) predicted_label_matrix = numpy.full(locating_var_matrix_m01_s01.shape, front_utils.NO_FRONT_INTEGER_ID, dtype=int) predicted_label_matrix[ locating_var_matrix_m01_s01 <= warm_front_threshold_m01_s01] = front_utils.WARM_FRONT_INTEGER_ID predicted_label_matrix[ locating_var_matrix_m01_s01 >= cold_front_threshold_m01_s01] = front_utils.COLD_FRONT_INTEGER_ID return predicted_label_matrix
def _create_paneled_figure(num_panel_rows, num_panel_columns, horizontal_space_fraction=0.1, vertical_space_fraction=0.1): """Creates paneled figure. :param num_panel_rows: Number of rows. :param num_panel_columns: Number of columns. :param horizontal_space_fraction: Horizontal space between adjacent panels (as fraction of panel size). :param vertical_space_fraction: Vertical space between adjacent panels (as fraction of panel size). :return: figure_object: Instance of `matplotlib.figure.Figure`. :return: axes_objects_2d_list: 2-D list, where axes_objects_2d_list[i][j] is the handle (instance of `matplotlib.axes._subplots.AxesSubplot`) for the [i]th row and [j]th column. """ error_checking.assert_is_integer(num_panel_rows) error_checking.assert_is_geq(num_panel_rows, 1) error_checking.assert_is_integer(num_panel_columns) error_checking.assert_is_geq(num_panel_columns, 1) error_checking.assert_is_geq(horizontal_space_fraction, 0.) error_checking.assert_is_less_than(horizontal_space_fraction, 1.) error_checking.assert_is_geq(vertical_space_fraction, 0.) error_checking.assert_is_less_than(vertical_space_fraction, 1.) figure_object, axes_objects_2d_list = pyplot.subplots( num_panel_rows, num_panel_columns, sharex=True, sharey=True, figsize=(FIGURE_WIDTH_INCHES, FIGURE_HEIGHT_INCHES)) if num_panel_rows == num_panel_columns == 1: axes_objects_2d_list = [[axes_objects_2d_list]] elif num_panel_columns == 1: axes_objects_2d_list = [[a] for a in axes_objects_2d_list] elif num_panel_rows == 1: axes_objects_2d_list = [axes_objects_2d_list] pyplot.subplots_adjust(left=0.02, bottom=0.02, right=0.98, top=0.95, hspace=vertical_space_fraction, wspace=horizontal_space_fraction) return figure_object, axes_objects_2d_list
def _check_input_args(num_iterations, learning_rate, ideal_activation=None): """Error-checks input args for backwards optimization. :param num_iterations: See doc for `_do_gradient_descent`. :param learning_rate: Same. :param ideal_activation: See doc for `optimize_input_for_neuron_activation` or `optimize_input_for_channel_activation`. """ error_checking.assert_is_integer(num_iterations) error_checking.assert_is_greater(num_iterations, 0) error_checking.assert_is_greater(learning_rate, 0.) error_checking.assert_is_less_than(learning_rate, 1.) if ideal_activation is not None: error_checking.assert_is_greater(ideal_activation, 0.)
def optimize_input_for_class(model_object, target_class, init_function_or_matrices, num_iterations=DEFAULT_NUM_ITERATIONS, learning_rate=DEFAULT_LEARNING_RATE): """Creates synthetic input example to maximize probability of target class. :param model_object: Trained instance of `keras.models.Model` or `keras.models.Sequential`. :param target_class: Input data will be optimized for this class. Must be an integer in 0...(K - 1), where K = number of classes. :param init_function_or_matrices: See doc for `_do_gradient_descent`. :param num_iterations: Same. :param learning_rate: Same. :return: list_of_optimized_matrices: Same. """ model_interpretation.check_component_metadata( component_type_string=model_interpretation.CLASS_COMPONENT_TYPE_STRING, target_class=target_class) _check_input_args(num_iterations=num_iterations, learning_rate=learning_rate) num_output_neurons = ( model_object.layers[-1].output.get_shape().as_list()[-1]) if num_output_neurons == 1: error_checking.assert_is_leq(target_class, 1) if target_class == 1: loss_tensor = K.mean( (model_object.layers[-1].output[..., 0] - 1)**2) else: loss_tensor = K.mean(model_object.layers[-1].output[..., 0]**2) else: error_checking.assert_is_less_than(target_class, num_output_neurons) loss_tensor = K.mean( (model_object.layers[-1].output[..., target_class] - 1)**2) return _do_gradient_descent( model_object=model_object, loss_tensor=loss_tensor, init_function_or_matrices=init_function_or_matrices, num_iterations=num_iterations, learning_rate=learning_rate)
def _check_decision_tree_hyperparams( num_trees, loss_function_string, num_features_total, num_features_per_split, max_depth, min_examples_per_split, min_examples_per_leaf, learning_rate=None, subsampling_fraction=None): """Checks decision-tree hyperparameters (input parameters) for errors. :param num_trees: Number of trees in ensemble. :param loss_function_string: Loss function. This method ensures only that the loss function is a string. The specific learning method will determine whether or not the string is valid. :param num_features_total: Number of features in training data. :param num_features_per_split: Number of features to investigate at each split point (branch node). :param max_depth: Max depth of any tree in ensemble. :param min_examples_per_split: Minimum number of examples (storm objects) at a split point (branch node). :param min_examples_per_leaf: Minimum number of examples (storm objects) at a leaf node. :param learning_rate: [for gradient-boosting only] Learning rate (used to decrease the contribution of each successive tree). :param subsampling_fraction: [for gradient-boosting only] Fraction of examples to use in training each tree. """ error_checking.assert_is_integer(num_trees) error_checking.assert_is_geq(num_trees, 2) error_checking.assert_is_string(loss_function_string) error_checking.assert_is_integer(num_features_per_split) error_checking.assert_is_greater(num_features_per_split, 0) error_checking.assert_is_leq(num_features_per_split, num_features_total) if max_depth is not None: error_checking.assert_is_integer(max_depth) error_checking.assert_is_greater(max_depth, 0) error_checking.assert_is_integer(min_examples_per_split) error_checking.assert_is_greater(min_examples_per_split, 1) error_checking.assert_is_integer(min_examples_per_leaf) error_checking.assert_is_greater(min_examples_per_leaf, 0) if learning_rate is not None: error_checking.assert_is_greater(learning_rate, 0.) error_checking.assert_is_less_than(learning_rate, 1.) if subsampling_fraction is not None: error_checking.assert_is_greater(subsampling_fraction, 0.) error_checking.assert_is_leq(subsampling_fraction, 1.)
def find_sig_grid_points(p_value_matrix, max_false_discovery_rate): """Finds grid points with statistically significant values. This method implements Equation 3 of Wilks et al. (2016), which you can find here: https://journals.ametsoc.org/doi/full/10.1175/BAMS-D-15-00267.1 :param p_value_matrix: numpy array of p-values. :param max_false_discovery_rate: Max false-discovery rate. :return: significance_matrix: numpy array of Boolean flags, with same shape as `p_value_matrix`. """ error_checking.assert_is_greater(max_false_discovery_rate, 0.) error_checking.assert_is_less_than(max_false_discovery_rate, 1.) p_values_sorted = numpy.ravel(p_value_matrix) p_values_sorted = p_values_sorted[numpy.invert( numpy.isnan(p_values_sorted))] p_values_sorted = numpy.sort(p_values_sorted) num_grid_cells = len(p_values_sorted) grid_cell_indices = numpy.linspace(1, num_grid_cells, num_grid_cells, dtype=float) significant_flags = ( p_values_sorted <= (grid_cell_indices / num_grid_cells) * max_false_discovery_rate) significant_indices = numpy.where(significant_flags)[0] if len(significant_indices) == 0: max_p_value = 0. else: max_p_value = p_values_sorted[significant_indices[-1]] print('Max p-value for Wilks test = {0:.2e}'.format(max_p_value)) significance_matrix = p_value_matrix <= max_p_value print('Number of significant grid points = {0:d}/{1:d}'.format( numpy.sum(significance_matrix), numpy.sum(numpy.invert(numpy.isnan(p_value_matrix))))) return significance_matrix
def rotate_displacement_vectors(x_displacements_metres, y_displacements_metres, ccw_rotation_angle_deg): """Rotates each displacement vector by a certain angle. :param x_displacements_metres: numpy array of eastward displacements. :param y_displacements_metres: equivalent-size numpy array of northward displacements. :param ccw_rotation_angle_deg: Rotation angle (degrees). Each displacement vector will be rotated counterclockwise by this amount. :return: x_prime_displacements_metres: equivalent-size numpy array of "eastward" displacements (in the rotated coordinate system). :return: y_prime_displacements_metres: equivalent-size numpy array of "northward" displacements (in the rotated coordinate system). """ error_checking.assert_is_numpy_array_without_nan(x_displacements_metres) error_checking.assert_is_numpy_array_without_nan(y_displacements_metres) error_checking.assert_is_numpy_array(y_displacements_metres, exact_dimensions=numpy.array( y_displacements_metres.shape)) error_checking.assert_is_greater(ccw_rotation_angle_deg, -360.) error_checking.assert_is_less_than(ccw_rotation_angle_deg, 360.) ccw_rotation_angle_rad = DEGREES_TO_RADIANS * ccw_rotation_angle_deg rotation_matrix = numpy.array([[ numpy.cos(ccw_rotation_angle_rad), -numpy.sin(ccw_rotation_angle_rad) ], [numpy.sin(ccw_rotation_angle_rad), numpy.cos(ccw_rotation_angle_rad)]]) x_prime_displacements_metres = numpy.full(x_displacements_metres.shape, numpy.nan) y_prime_displacements_metres = numpy.full(x_displacements_metres.shape, numpy.nan) num_points = x_prime_displacements_metres.size for i in range(num_points): this_vector = numpy.transpose( numpy.array([ x_displacements_metres.flat[i], y_displacements_metres.flat[i] ])) this_vector = numpy.matmul(rotation_matrix, this_vector) x_prime_displacements_metres.flat[i] = this_vector[0] y_prime_displacements_metres.flat[i] = this_vector[1] return x_prime_displacements_metres, y_prime_displacements_metres
def check_input_args(input_matrix, max_percentile_level, threshold_var_index, threshold_value, threshold_type_string): """Error-checks input arguments. :param input_matrix: See doc for `run_pmm_many_variables`. :param max_percentile_level: Same. :param threshold_var_index: Same. :param threshold_value: Same. :param threshold_type_string: Same. :return: metadata_dict: Dictionary with the following keys. metadata_dict['max_percentile_level']: See input doc. metadata_dict['threshold_var_index']: See input doc. metadata_dict['threshold_value']: See input doc. metadata_dict['threshold_type_string']: See input doc. """ error_checking.assert_is_numpy_array_without_nan(input_matrix) num_spatial_dimensions = len(input_matrix.shape) - 2 error_checking.assert_is_geq(num_spatial_dimensions, 1) error_checking.assert_is_greater(max_percentile_level, 50.) error_checking.assert_is_leq(max_percentile_level, 100.) use_threshold = not (threshold_var_index is None and threshold_value is None and threshold_type_string is None) if use_threshold: _check_threshold_type(threshold_type_string) error_checking.assert_is_not_nan(threshold_value) error_checking.assert_is_integer(threshold_var_index) error_checking.assert_is_geq(threshold_var_index, 0) num_variables = input_matrix.shape[-1] error_checking.assert_is_less_than(threshold_var_index, num_variables) else: threshold_var_index = -1 return { MAX_PERCENTILE_KEY: max_percentile_level, THRESHOLD_VAR_KEY: threshold_var_index, THRESHOLD_VALUE_KEY: threshold_value, THRESHOLD_TYPE_KEY: threshold_type_string }
def get_class_activation_for_examples( model_object, target_class, list_of_input_matrices): """For each input example, returns predicted probability of target class. :param model_object: Instance of `keras.models.Model`. :param target_class: Predictions will be returned for this class. Must be an integer in 0...(K - 1), where K = number of classes. :param list_of_input_matrices: length-T list of numpy arrays, comprising one or more examples (storm objects). list_of_input_matrices[i] must have the same dimensions as the [i]th input tensor to the model. :return: activation_values: length-E numpy array, where activation_values[i] is the activation (predicted probability or logit) of the target class for the [i]th example. """ check_metadata( component_type_string=model_interpretation.CLASS_COMPONENT_TYPE_STRING, target_class=target_class) if isinstance(model_object.input, list): list_of_input_tensors = model_object.input else: list_of_input_tensors = [model_object.input] num_output_neurons = model_object.layers[-1].output.get_shape().as_list()[ -1] if num_output_neurons == 1: error_checking.assert_is_leq(target_class, 1) if target_class == 1: output_tensor = model_object.layers[-1].output[..., 0] else: output_tensor = 1. - model_object.layers[-1].output[..., 0] else: error_checking.assert_is_less_than(target_class, num_output_neurons) output_tensor = model_object.layers[-1].output[..., target_class] activation_function = K.function( list_of_input_tensors + [K.learning_phase()], [output_tensor]) return activation_function(list_of_input_matrices + [0])[0]
def run_a_star(grid_search_object, start_row, start_column, end_row, end_column): """Runs A-star search. If A-star cannot reach the end node, this method returns None for all outputs. N = number of nodes in final path :param grid_search_object: Instance of `GridSearch`. :param start_row: Row index of start node. :param start_column: Column index of start node. :param end_row: Row index of end node. :param end_column: Column index of end node. :return: visited_rows: length-N numpy array with row indices of nodes in final path. :return: visited_columns: length-N numpy array with column indices of nodes in final path. """ error_checking.assert_is_integer(start_row) error_checking.assert_is_geq(start_row, 0) error_checking.assert_is_less_than( start_row, getattr(grid_search_object, NUM_GRID_ROWS_KEY)) error_checking.assert_is_integer(end_row) error_checking.assert_is_geq(end_row, 0) error_checking.assert_is_less_than( end_row, getattr(grid_search_object, NUM_GRID_ROWS_KEY)) error_checking.assert_is_integer(start_column) error_checking.assert_is_geq(start_column, 0) error_checking.assert_is_less_than( start_column, getattr(grid_search_object, NUM_GRID_COLUMNS_KEY)) error_checking.assert_is_integer(end_column) error_checking.assert_is_geq(end_column, 0) error_checking.assert_is_less_than( end_column, getattr(grid_search_object, NUM_GRID_COLUMNS_KEY)) visited_rowcol_tuples = grid_search_object.astar((start_column, start_row), (end_column, end_row)) if visited_rowcol_tuples is None: return None, None visited_rowcol_tuples = list(visited_rowcol_tuples) visited_rows = numpy.array([x[1] for x in visited_rowcol_tuples], dtype=int) visited_columns = numpy.array([x[0] for x in visited_rowcol_tuples], dtype=int) return visited_rows, visited_columns
def pad_closed_polygon(polygon_object, num_padding_vertices=0, check_input_args=True): """Pads closed polygon (by adding duplicate vertices at either end). V_p = number of vertices after padding :param polygon_object: Instance of `shapely.geometry.Polygon`. :param num_padding_vertices: Number of duplicate vertices to add at either end. :param check_input_args: Boolean flag. If True, will error-check input arguments. If False, will not. :return: vertex_x_coords_padded: numpy array (length V_p) with x-coordinates of vertices. :return: vertex_y_coords_padded: numpy array (length V_p) with y-coordinates of vertices. """ vertex_x_coords = numpy.asarray(polygon_object.exterior.xy[0])[:-1] vertex_y_coords = numpy.asarray(polygon_object.exterior.xy[1])[:-1] num_vertices = len(vertex_x_coords) if check_input_args: error_checking.assert_is_geq(num_vertices, MIN_VERTICES_IN_POLYGON_OR_LINE) error_checking.assert_is_integer(num_padding_vertices) error_checking.assert_is_greater(num_padding_vertices, 0) error_checking.assert_is_less_than(num_padding_vertices, num_vertices) vertex_x_coords_start = vertex_x_coords[-num_padding_vertices:] vertex_y_coords_start = vertex_y_coords[-num_padding_vertices:] vertex_x_coords_end = vertex_x_coords[:num_padding_vertices] vertex_y_coords_end = vertex_y_coords[:num_padding_vertices] vertex_x_coords_padded = numpy.concatenate( (vertex_x_coords_start, vertex_x_coords, vertex_x_coords_end)) vertex_y_coords_padded = numpy.concatenate( (vertex_y_coords_start, vertex_y_coords, vertex_y_coords_end)) return vertex_x_coords_padded, vertex_y_coords_padded
def _get_grid_point_coords(model_name, first_row_in_full_grid, last_row_in_full_grid, first_column_in_full_grid, last_column_in_full_grid, grid_id=None, basemap_object=None): """Returns x-y and lat-long coords for a subgrid of the full model grid. This method generates different x-y coordinates than `nwp_model_utils.get_xy_grid_point_matrices`, because (like `mpl_toolkits.basemap.Basemap`) this method sets false easting = false northing = 0 metres. :param model_name: Name of NWP model (must be accepted by `nwp_model_utils.check_grid_name`). :param first_row_in_full_grid: Row 0 in the subgrid is row `first_row_in_full_grid` in the full grid. :param last_row_in_full_grid: Last row in the subgrid is row `last_row_in_full_grid` in the full grid. If you want last row in the subgrid to equal last row in the full grid, make this -1. :param first_column_in_full_grid: Column 0 in the subgrid is column `first_column_in_full_grid` in the full grid. :param last_column_in_full_grid: Last column in the subgrid is column `last_column_in_full_grid` in the full grid. If you want last column in the subgrid to equal last column in the full grid, make this -1. :param grid_id: Grid for NWP model (must be accepted by `nwp_model_utils.check_grid_name`). :param basemap_object: Instance of `mpl_toolkits.basemap.Basemap` for the given NWP model. If you don't have one, no big deal -- leave this argument empty. :return: coordinate_dict: Dictionary with the following keys. coordinate_dict['grid_point_x_matrix_metres']: M-by-N numpy array of x-coordinates. coordinate_dict['grid_point_y_matrix_metres']: M-by-N numpy array of y-coordinates. coordinate_dict['grid_point_lat_matrix_deg']: M-by-N numpy array of latitudes (deg N). coordinate_dict['grid_point_lng_matrix_deg']: M-by-N numpy array of longitudes (deg E). """ num_rows_in_full_grid, num_columns_in_full_grid = ( nwp_model_utils.get_grid_dimensions(model_name=model_name, grid_name=grid_id)) error_checking.assert_is_integer(first_row_in_full_grid) error_checking.assert_is_geq(first_row_in_full_grid, 0) error_checking.assert_is_integer(last_row_in_full_grid) if last_row_in_full_grid < 0: last_row_in_full_grid += num_rows_in_full_grid error_checking.assert_is_greater(last_row_in_full_grid, first_row_in_full_grid) error_checking.assert_is_less_than(last_row_in_full_grid, num_rows_in_full_grid) error_checking.assert_is_integer(first_column_in_full_grid) error_checking.assert_is_geq(first_column_in_full_grid, 0) error_checking.assert_is_integer(last_column_in_full_grid) if last_column_in_full_grid < 0: last_column_in_full_grid += num_columns_in_full_grid error_checking.assert_is_greater(last_column_in_full_grid, first_column_in_full_grid) error_checking.assert_is_less_than(last_column_in_full_grid, num_columns_in_full_grid) grid_point_lat_matrix_deg, grid_point_lng_matrix_deg = ( nwp_model_utils.get_latlng_grid_point_matrices(model_name=model_name, grid_name=grid_id)) grid_point_lat_matrix_deg = grid_point_lat_matrix_deg[ first_row_in_full_grid:(last_row_in_full_grid + 1), first_column_in_full_grid:(last_column_in_full_grid + 1)] grid_point_lng_matrix_deg = grid_point_lng_matrix_deg[ first_row_in_full_grid:(last_row_in_full_grid + 1), first_column_in_full_grid:(last_column_in_full_grid + 1)] if basemap_object is None: standard_latitudes_deg, central_longitude_deg = ( nwp_model_utils.get_projection_params(model_name)) projection_object = projections.init_lcc_projection( standard_latitudes_deg=standard_latitudes_deg, central_longitude_deg=central_longitude_deg) grid_point_x_matrix_metres, grid_point_y_matrix_metres = ( projections.project_latlng_to_xy( latitudes_deg=grid_point_lat_matrix_deg, longitudes_deg=grid_point_lng_matrix_deg, projection_object=projection_object, false_northing_metres=0., false_easting_metres=0.)) else: grid_point_x_matrix_metres, grid_point_y_matrix_metres = basemap_object( grid_point_lng_matrix_deg, grid_point_lat_matrix_deg) return { X_COORD_MATRIX_KEY: grid_point_x_matrix_metres, Y_COORD_MATRIX_KEY: grid_point_y_matrix_metres, LATITUDE_MATRIX_KEY: grid_point_lat_matrix_deg, LONGITUDE_MATRIX_KEY: grid_point_lng_matrix_deg, }
def plot_2d_grid_with_contours(saliency_matrix_2d, axes_object, colour_map_object, max_absolute_contour_level, contour_interval, line_width=DEFAULT_CONTOUR_WIDTH): """Plots 2-D saliency map with line contours. M = number of rows in spatial grid N = number of columns in spatial grid :param saliency_matrix_2d: M-by-N numpy array of saliency values. :param axes_object: Instance of `matplotlib.axes._subplots.AxesSubplot`. Will plot on these axes. :param colour_map_object: Colour scheme (instance of `matplotlib.pyplot.cm`). :param max_absolute_contour_level: Max absolute value to plot. Minimum value will be `-1 * max_absolute_contour_level`. :param contour_interval: Interval (in saliency units) between successive contours. :param line_width: Width of contour lines. """ error_checking.assert_is_geq(max_absolute_contour_level, 0.) max_absolute_contour_level = max([max_absolute_contour_level, 0.001]) error_checking.assert_is_geq(contour_interval, 0.) contour_interval = max([contour_interval, 0.0001]) error_checking.assert_is_numpy_array_without_nan(saliency_matrix_2d) error_checking.assert_is_numpy_array(saliency_matrix_2d, num_dimensions=2) error_checking.assert_is_less_than(contour_interval, max_absolute_contour_level) num_grid_rows = saliency_matrix_2d.shape[0] num_grid_columns = saliency_matrix_2d.shape[1] x_coords_unique = numpy.linspace(0, num_grid_columns, num=num_grid_columns + 1, dtype=float) x_coords_unique = x_coords_unique[:-1] x_coords_unique = x_coords_unique + numpy.diff(x_coords_unique[:2]) / 2 y_coords_unique = numpy.linspace(0, num_grid_rows, num=num_grid_rows + 1, dtype=float) y_coords_unique = y_coords_unique[:-1] y_coords_unique = y_coords_unique + numpy.diff(y_coords_unique[:2]) / 2 x_coord_matrix, y_coord_matrix = numpy.meshgrid(x_coords_unique, y_coords_unique) half_num_contours = int( numpy.round(1 + max_absolute_contour_level / contour_interval)) # Plot positive values. these_contour_levels = numpy.linspace(0., max_absolute_contour_level, num=half_num_contours) axes_object.contour(x_coord_matrix, y_coord_matrix, saliency_matrix_2d, these_contour_levels, cmap=colour_map_object, vmin=numpy.min(these_contour_levels), vmax=numpy.max(these_contour_levels), linewidths=line_width, linestyles='solid', zorder=1e6) # Plot negative values. these_contour_levels = these_contour_levels[1:] axes_object.contour(x_coord_matrix, y_coord_matrix, -saliency_matrix_2d, these_contour_levels, cmap=colour_map_object, vmin=numpy.min(these_contour_levels), vmax=numpy.max(these_contour_levels), linewidths=line_width, linestyles='dashed', zorder=1e6)
def pad_polyline(vertex_x_coords, vertex_y_coords, num_padding_vertices=0, check_input_args=True): """Pads polyline* by adding extrapolated vertices at either end. * as opposed to closed polygon V_u = number of unique vertices V_p = number of vertices after padding :param vertex_x_coords: numpy array (length V_u) with x-coordinates of vertices. :param vertex_y_coords: numpy array (length V_u) with y-coordinates of vertices. :param num_padding_vertices: Number of extrapolated vertices to add at either end. :param check_input_args: Boolean flag. If True, will error-check input arguments. If False, will not. :return: vertex_x_coords_padded: numpy array (length V_p) with x-coordinates of vertices. :return: vertex_y_coords_padded: numpy array (length V_p) with y-coordinates of vertices. """ num_vertices = vertex_x_coords.size if check_input_args: error_checking.assert_is_geq(num_vertices, MIN_VERTICES_IN_POLYGON_OR_LINE) error_checking.assert_is_numpy_array_without_nan(vertex_x_coords) error_checking.assert_is_numpy_array(vertex_x_coords, num_dimensions=1) error_checking.assert_is_numpy_array_without_nan(vertex_y_coords) error_checking.assert_is_numpy_array(vertex_y_coords, exact_dimensions=numpy.array( [num_vertices])) error_checking.assert_is_integer(num_padding_vertices) error_checking.assert_is_greater(num_padding_vertices, 0) error_checking.assert_is_less_than(num_padding_vertices, num_vertices) x_difference = vertex_x_coords[1] - vertex_x_coords[0] vertex_x_coords_start = numpy.linspace(vertex_x_coords[0] - num_padding_vertices * x_difference, vertex_x_coords[0] - x_difference, num=num_padding_vertices) y_difference = vertex_y_coords[1] - vertex_y_coords[0] vertex_y_coords_start = numpy.linspace(vertex_y_coords[0] - num_padding_vertices * y_difference, vertex_y_coords[0] - y_difference, num=num_padding_vertices) x_difference = vertex_x_coords[-1] - vertex_x_coords[-2] vertex_x_coords_end = numpy.linspace(vertex_x_coords[-1] + x_difference, vertex_x_coords[-1] + num_padding_vertices * x_difference, num=num_padding_vertices) y_difference = vertex_y_coords[-1] - vertex_y_coords[-2] vertex_y_coords_end = numpy.linspace(vertex_y_coords[-1] + y_difference, vertex_y_coords[-1] + num_padding_vertices * y_difference, num=num_padding_vertices) vertex_x_coords_padded = numpy.concatenate( (vertex_x_coords_start, vertex_x_coords, vertex_x_coords_end)) vertex_y_coords_padded = numpy.concatenate( (vertex_y_coords_start, vertex_y_coords, vertex_y_coords_end)) return vertex_x_coords_padded, vertex_y_coords_padded
def _get_error_matrix(cost_matrix, is_cost_auc, confidence_level, backwards_flag, multipass_flag): """Creates error matrix (used to plot error bars). S = number of steps in permutation test B = number of bootstrap replicates :param cost_matrix: S-by-B numpy array of costs. :param is_cost_auc: Boolean flag. If True, cost function is AUC (area under receiver-operating-characteristic curve). :param confidence_level: Confidence level (in range 0...1). :param backwards_flag: Boolean flag, indicating whether the test is forward or backwards. :param multipass_flag: Boolean flag, indicating whether the test is single-pass or multi-pass. :return: error_matrix: 2-by-S numpy array, where the first row contains negative errors and second row contains positive errors. :return: significant_flags: length-S numpy array of Boolean flags. If significant_flags[i] = True, the [i]th step has a significantly different cost than the [i + 1]th step. """ num_steps = cost_matrix.shape[0] significant_flags = numpy.full(num_steps, False, dtype=bool) for i in range(num_steps - 1): if backwards_flag: these_diffs = cost_matrix[i + 1, :] - cost_matrix[i, :] else: these_diffs = cost_matrix[i, :] - cost_matrix[i + 1, :] if not is_cost_auc: these_diffs *= -1 print(numpy.mean(these_diffs)) this_percentile = percentileofscore(a=these_diffs, score=0., kind='mean') if multipass_flag: significant_flags[i] = this_percentile <= 5. else: significant_flags[i + 1] = this_percentile <= 5. print(( 'Percentile of 0 in (cost at step {0:d}) - (cost at step {1:d}) = ' '{2:.4f}').format(i + 1, i, this_percentile)) print(significant_flags) print('\n') error_checking.assert_is_geq(confidence_level, 0.9) error_checking.assert_is_less_than(confidence_level, 1.) mean_costs = numpy.mean(cost_matrix, axis=-1) min_costs = numpy.percentile(cost_matrix, 50 * (1. - confidence_level), axis=-1) max_costs = numpy.percentile(cost_matrix, 50 * (1. + confidence_level), axis=-1) negative_errors = mean_costs - min_costs positive_errors = max_costs - mean_costs negative_errors = numpy.reshape(negative_errors, (1, negative_errors.size)) positive_errors = numpy.reshape(positive_errors, (1, positive_errors.size)) error_matrix = numpy.vstack((negative_errors, positive_errors)) return error_matrix, significant_flags
def _run(input_gradcam_file_name, percentile_threshold, min_class_activation, output_file_name): """Thresholds Grad-CAM output to create regions of interest (polygons). This is effectively the main method. :param input_gradcam_file_name: See documentation at top of file. :param percentile_threshold: Same. :param min_class_activation: Same. :param output_file_name: Same. :raises: TypeError: if any class-activation map contains not-2 spatial dimensions. """ error_checking.assert_is_geq(percentile_threshold, 50.) error_checking.assert_is_less_than(percentile_threshold, 100.) error_checking.assert_is_greater(min_class_activation, 0.) print('Reading data from: "{0:s}"...\n'.format(input_gradcam_file_name)) pmm_flag = False try: gradcam_dict = gradcam.read_standard_file(input_gradcam_file_name) list_of_cam_matrices = gradcam_dict.pop(gradcam.CAM_MATRICES_KEY) except ValueError: gradcam_dict = gradcam.read_pmm_file(input_gradcam_file_name) list_of_cam_matrices = gradcam_dict.pop(gradcam.MEAN_CAM_MATRICES_KEY) for j in range(len(list_of_cam_matrices)): if list_of_cam_matrices[j] is None: continue list_of_cam_matrices[j] = numpy.expand_dims( list_of_cam_matrices[j], axis=0 ) pmm_flag = True num_matrices = len(list_of_cam_matrices) num_examples = None for j in range(num_matrices): if list_of_cam_matrices[j] is None: continue num_examples = list_of_cam_matrices[j].shape[0] this_num_spatial_dim = len(list_of_cam_matrices[j].shape) - 1 if this_num_spatial_dim == 2: continue error_string = ( 'This script deals with only 2-D class-activation maps. {0:d}th ' 'input matrix contains {1:d} spatial dimensions.' ).format(j + 1, this_num_spatial_dim) raise TypeError(error_string) list_of_mask_matrices = [None] * num_matrices list_of_polygon_objects = [[[] * 0] * num_examples] * num_matrices for i in range(num_examples): for j in range(num_matrices): if list_of_cam_matrices[j] is None: continue this_min_class_activation = numpy.percentile( list_of_cam_matrices[j][i, ...], percentile_threshold ) this_min_class_activation = max([ this_min_class_activation, min_class_activation ]) print(( 'Creating mask for {0:d}th example and {1:d}th class-activation' ' matrix, with threshold = {2:.3e}...' ).format( i + 1, j + 1, this_min_class_activation )) this_mask_matrix = ( list_of_cam_matrices[j][i, ...] >= this_min_class_activation ) print('{0:d} of {1:d} grid points are inside mask.\n'.format( numpy.sum(this_mask_matrix.astype(int)), this_mask_matrix.size )) list_of_polygon_objects[j][i] = _mask_to_polygons(this_mask_matrix) this_mask_matrix = numpy.expand_dims(this_mask_matrix, axis=0) if list_of_mask_matrices[j] is None: list_of_mask_matrices[j] = copy.deepcopy(this_mask_matrix) else: list_of_mask_matrices[j] = numpy.concatenate( (list_of_mask_matrices[j], this_mask_matrix), axis=0 ) if pmm_flag: for j in range(len(list_of_mask_matrices)): if list_of_mask_matrices[j] is None: continue list_of_mask_matrices[j] = list_of_mask_matrices[j][0, ...] region_dict = { gradcam.MASK_MATRICES_KEY: list_of_mask_matrices, gradcam.POLYGON_OBJECTS_KEY: list_of_polygon_objects, gradcam.PERCENTILE_THRESHOLD_KEY: percentile_threshold, gradcam.MIN_CLASS_ACTIVATION_KEY: min_class_activation } if output_file_name in ['', 'None']: output_file_name = input_gradcam_file_name print('Writing regions of interest to: "{0:s}"...'.format(output_file_name)) gradcam.add_regions_to_file( input_file_name=input_gradcam_file_name, output_file_name=output_file_name, region_dict=region_dict)
def _run(evaluation_file_names, line_styles, line_colour_strings, set_descriptions_verbose, confidence_level, use_log_scale, plot_by_height, output_dir_name): """Plots model evaluation. This is effectively the main method. :param evaluation_file_names: See documentation at top of file. :param line_styles: Same. :param line_colour_strings: Same. :param set_descriptions_verbose: Same. :param confidence_level: Same. :param use_log_scale: Same. :param plot_by_height: Same. :param output_dir_name: Same. """ # Check input args. file_system_utils.mkdir_recursive_if_necessary( directory_name=output_dir_name) if confidence_level < 0: confidence_level = None if confidence_level is not None: error_checking.assert_is_geq(confidence_level, 0.9) error_checking.assert_is_less_than(confidence_level, 1.) num_evaluation_sets = len(evaluation_file_names) expected_dim = numpy.array([num_evaluation_sets], dtype=int) error_checking.assert_is_string_list(line_styles) error_checking.assert_is_numpy_array(numpy.array(line_styles), exact_dimensions=expected_dim) error_checking.assert_is_string_list(set_descriptions_verbose) error_checking.assert_is_numpy_array(numpy.array(set_descriptions_verbose), exact_dimensions=expected_dim) set_descriptions_verbose = [ s.replace('_', ' ') for s in set_descriptions_verbose ] set_descriptions_abbrev = [ s.lower().replace(' ', '-') for s in set_descriptions_verbose ] error_checking.assert_is_string_list(line_colour_strings) error_checking.assert_is_numpy_array(numpy.array(line_colour_strings), exact_dimensions=expected_dim) line_colours = [ numpy.fromstring(s, dtype=float, sep='_') / 255 for s in line_colour_strings ] for i in range(num_evaluation_sets): error_checking.assert_is_numpy_array(line_colours[i], exact_dimensions=numpy.array( [3], dtype=int)) error_checking.assert_is_geq_numpy_array(line_colours[i], 0.) error_checking.assert_is_leq_numpy_array(line_colours[i], 1.) # Read files. evaluation_tables_xarray = [xarray.Dataset()] * num_evaluation_sets prediction_dicts = [dict()] * num_evaluation_sets for i in range(num_evaluation_sets): print('Reading data from: "{0:s}"...'.format(evaluation_file_names[i])) evaluation_tables_xarray[i] = evaluation.read_file( evaluation_file_names[i]) this_prediction_file_name = ( evaluation_tables_xarray[i].attrs[evaluation.PREDICTION_FILE_KEY]) print( 'Reading data from: "{0:s}"...'.format(this_prediction_file_name)) prediction_dicts[i] = prediction_io.read_file( this_prediction_file_name) model_file_name = ( evaluation_tables_xarray[0].attrs[evaluation.MODEL_FILE_KEY]) model_metafile_name = neural_net.find_metafile( model_dir_name=os.path.split(model_file_name)[0], raise_error_if_missing=True) print('Reading metadata from: "{0:s}"...'.format(model_metafile_name)) model_metadata_dict = neural_net.read_metafile(model_metafile_name) generator_option_dict = model_metadata_dict[ neural_net.TRAINING_OPTIONS_KEY] scalar_target_names = ( generator_option_dict[neural_net.SCALAR_TARGET_NAMES_KEY]) vector_target_names = ( generator_option_dict[neural_net.VECTOR_TARGET_NAMES_KEY]) heights_m_agl = generator_option_dict[neural_net.HEIGHTS_KEY] try: t = evaluation_tables_xarray[0] aux_target_names = t.coords[evaluation.AUX_TARGET_FIELD_DIM].values except: aux_target_names = [] num_scalar_targets = len(scalar_target_names) num_vector_targets = len(vector_target_names) num_heights = len(heights_m_agl) num_aux_targets = len(aux_target_names) example_dict = { example_utils.SCALAR_TARGET_NAMES_KEY: scalar_target_names, example_utils.VECTOR_TARGET_NAMES_KEY: vector_target_names, example_utils.HEIGHTS_KEY: heights_m_agl, example_utils.SCALAR_PREDICTOR_NAMES_KEY: generator_option_dict[neural_net.SCALAR_PREDICTOR_NAMES_KEY], example_utils.VECTOR_PREDICTOR_NAMES_KEY: generator_option_dict[neural_net.VECTOR_PREDICTOR_NAMES_KEY] } normalization_file_name = ( generator_option_dict[neural_net.NORMALIZATION_FILE_KEY]) print(('Reading training examples (for climatology) from: "{0:s}"...' ).format(normalization_file_name)) training_example_dict = example_io.read_file(normalization_file_name) training_example_dict = example_utils.subset_by_height( example_dict=training_example_dict, heights_m_agl=heights_m_agl) mean_training_example_dict = normalization.create_mean_example( new_example_dict=example_dict, training_example_dict=training_example_dict) print(SEPARATOR_STRING) # Do actual stuff. _plot_error_distributions( prediction_dicts=prediction_dicts, model_metadata_dict=model_metadata_dict, aux_target_names=aux_target_names, set_descriptions_abbrev=set_descriptions_abbrev, set_descriptions_verbose=set_descriptions_verbose, output_dir_name=output_dir_name) print(SEPARATOR_STRING) _plot_reliability_by_height( evaluation_tables_xarray=evaluation_tables_xarray, vector_target_names=vector_target_names, heights_m_agl=heights_m_agl, set_descriptions_abbrev=set_descriptions_abbrev, set_descriptions_verbose=set_descriptions_verbose, output_dir_name=output_dir_name) print(SEPARATOR_STRING) for k in range(num_vector_targets): for this_score_name in list(SCORE_NAME_TO_PROFILE_KEY.keys()): _plot_score_profile( evaluation_tables_xarray=evaluation_tables_xarray, line_styles=line_styles, line_colours=line_colours, set_descriptions_verbose=set_descriptions_verbose, confidence_level=confidence_level, target_name=vector_target_names[k], score_name=this_score_name, use_log_scale=use_log_scale, output_dir_name=output_dir_name) print(SEPARATOR_STRING) for k in range(num_scalar_targets): _plot_attributes_diagram( evaluation_tables_xarray=evaluation_tables_xarray, line_styles=line_styles, line_colours=line_colours, set_descriptions_abbrev=set_descriptions_abbrev, set_descriptions_verbose=set_descriptions_verbose, confidence_level=confidence_level, mean_training_example_dict=mean_training_example_dict, target_name=scalar_target_names[k], output_dir_name=output_dir_name) for k in range(num_aux_targets): _plot_attributes_diagram( evaluation_tables_xarray=evaluation_tables_xarray, line_styles=line_styles, line_colours=line_colours, set_descriptions_abbrev=set_descriptions_abbrev, set_descriptions_verbose=set_descriptions_verbose, confidence_level=confidence_level, mean_training_example_dict=mean_training_example_dict, target_name=aux_target_names[k], output_dir_name=output_dir_name) if not plot_by_height: return print(SEPARATOR_STRING) for k in range(num_vector_targets): for j in range(num_heights): _plot_attributes_diagram( evaluation_tables_xarray=evaluation_tables_xarray, line_styles=line_styles, line_colours=line_colours, set_descriptions_abbrev=set_descriptions_abbrev, set_descriptions_verbose=set_descriptions_verbose, confidence_level=confidence_level, mean_training_example_dict=mean_training_example_dict, height_m_agl=heights_m_agl[j], target_name=vector_target_names[k], output_dir_name=output_dir_name) if k != num_vector_targets - 1: print(SEPARATOR_STRING)
def get_xy_grid_point_matrices(first_row_in_narr_grid, last_row_in_narr_grid, first_column_in_narr_grid, last_column_in_narr_grid, basemap_object=None): """Returns coordinate matrices for a contiguous subset of the NARR grid. However, this subset need not be *strictly* a subset. In other words, the "subset" could be the full NARR grid. This method generates different x- and y-coordinates than `nwp_model_utils.get_xy_grid_point_matrices`, because (like `mpl_toolkits.basemap.Basemap`) this method assumes that false easting and northing are zero. :param first_row_in_narr_grid: Row 0 in the subgrid is row `first_row_in_narr_grid` in the full NARR grid. :param last_row_in_narr_grid: Last row (index -1) in the subgrid is row `last_row_in_narr_grid` in the full NARR grid. :param first_column_in_narr_grid: Column 0 in the subgrid is row `first_column_in_narr_grid` in the full NARR grid. :param last_column_in_narr_grid: Last column (index -1) in the subgrid is row `last_column_in_narr_grid` in the full NARR grid. :param basemap_object: Instance of `mpl_toolkits.basemap.Basemap` created for the NARR grid. If you don't have one, no big deal -- leave this argument empty. :return: grid_point_x_matrix_metres: M-by-N numpy array of x-coordinates. :return: grid_point_y_matrix_metres: M-by-N numpy array of y-coordinates. """ error_checking.assert_is_integer(first_row_in_narr_grid) error_checking.assert_is_geq(first_row_in_narr_grid, 0) error_checking.assert_is_integer(last_row_in_narr_grid) error_checking.assert_is_greater(last_row_in_narr_grid, first_row_in_narr_grid) error_checking.assert_is_less_than(last_row_in_narr_grid, NUM_ROWS_IN_NARR_GRID) error_checking.assert_is_integer(first_column_in_narr_grid) error_checking.assert_is_geq(first_column_in_narr_grid, 0) error_checking.assert_is_integer(last_column_in_narr_grid) error_checking.assert_is_greater(last_column_in_narr_grid, first_column_in_narr_grid) error_checking.assert_is_less_than(last_column_in_narr_grid, NUM_COLUMNS_IN_NARR_GRID) latitude_matrix_deg, longitude_matrix_deg = ( nwp_model_utils.get_latlng_grid_point_matrices( model_name=nwp_model_utils.NARR_MODEL_NAME)) latitude_matrix_deg = latitude_matrix_deg[first_row_in_narr_grid:( last_row_in_narr_grid + 1), first_column_in_narr_grid:(last_column_in_narr_grid + 1)] longitude_matrix_deg = longitude_matrix_deg[first_row_in_narr_grid:( last_row_in_narr_grid + 1), first_column_in_narr_grid:(last_column_in_narr_grid + 1)] if basemap_object is None: standard_latitudes_deg, central_longitude_deg = ( nwp_model_utils.get_projection_params( nwp_model_utils.NARR_MODEL_NAME)) projection_object = projections.init_lambert_conformal_projection( standard_latitudes_deg=standard_latitudes_deg, central_longitude_deg=central_longitude_deg) grid_point_x_matrix_metres, grid_point_y_matrix_metres = ( projections.project_latlng_to_xy( latitude_matrix_deg, longitude_matrix_deg, projection_object=projection_object, false_northing_metres=0., false_easting_metres=0.)) else: grid_point_x_matrix_metres, grid_point_y_matrix_metres = ( basemap_object(longitude_matrix_deg, latitude_matrix_deg)) return grid_point_x_matrix_metres, grid_point_y_matrix_metres
def get_echo_tops( unix_time_sec, spc_date_string, top_directory_name, critical_reflectivity_dbz, top_height_to_consider_m_asl=DEFAULT_TOP_INPUT_HEIGHT_FOR_ECHO_TOPS_M_ASL, lowest_refl_to_consider_dbz=None): """Finds echo top at each horizontal location. "Echo top" is max height with reflectivity >= critical reflectivity. M = number of rows (unique grid-point latitudes) N = number of columns (unique grid-point longitudes) :param unix_time_sec: Valid time. :param spc_date_string: SPC date (format "yyyymmdd"). :param top_directory_name: Name of top-level directory with MYRORSS files. :param critical_reflectivity_dbz: Critical reflectivity (used to define echo top). :param top_height_to_consider_m_asl: Top height level to consider (metres above sea level). :param lowest_refl_to_consider_dbz: Lowest reflectivity to consider in echo top calculations. If None, will consider all reflectivities. :return: echo_top_matrix_m_asl: M-by-N matrix of echo tops (metres above sea level). Latitude increases down each column, and longitude increases to the right along each row. :return: grid_point_latitudes_deg: length-M numpy array with latitudes (deg N) of grid points, sorted in ascending order. :return: grid_point_longitudes_deg: length-N numpy array with longitudes (deg E) of grid points, sorted in ascending order. :return: metadata_dict: Dictionary created by `myrorss_and_mrms_io.read_metadata_from_raw_file` for column-max reflectivity. """ error_checking.assert_is_greater(critical_reflectivity_dbz, 0.) error_checking.assert_is_greater(top_height_to_consider_m_asl, 0) top_height_to_consider_m_asl = int( numpy.round(top_height_to_consider_m_asl)) if lowest_refl_to_consider_dbz is None: lowest_refl_to_consider_dbz = 0. error_checking.assert_is_less_than(lowest_refl_to_consider_dbz, critical_reflectivity_dbz) grid_point_heights_m_asl = radar_utils.get_valid_heights( data_source=radar_utils.MYRORSS_SOURCE_ID, field_name=radar_utils.REFL_NAME) grid_point_heights_m_asl = grid_point_heights_m_asl[ grid_point_heights_m_asl <= top_height_to_consider_m_asl] column_max_refl_file_name = myrorss_and_mrms_io.find_raw_file( unix_time_sec=unix_time_sec, spc_date_string=spc_date_string, field_name=radar_utils.REFL_COLUMN_MAX_NAME, data_source=radar_utils.MYRORSS_SOURCE_ID, top_directory_name=top_directory_name) num_grid_heights = len(grid_point_heights_m_asl) single_height_refl_file_names = [''] * num_grid_heights for k in range(num_grid_heights): single_height_refl_file_names[k] = myrorss_and_mrms_io.find_raw_file( unix_time_sec=unix_time_sec, spc_date_string=spc_date_string, field_name=radar_utils.REFL_NAME, data_source=radar_utils.MYRORSS_SOURCE_ID, top_directory_name=top_directory_name, height_m_asl=grid_point_heights_m_asl[k]) print 'Reading "{0:s}" for echo-top calculation...'.format( column_max_refl_file_name) metadata_dict = myrorss_and_mrms_io.read_metadata_from_raw_file( column_max_refl_file_name, data_source=radar_utils.MYRORSS_SOURCE_ID) this_sparse_grid_table = ( myrorss_and_mrms_io.read_data_from_sparse_grid_file( column_max_refl_file_name, field_name_orig=metadata_dict[ myrorss_and_mrms_io.FIELD_NAME_COLUMN_ORIG], data_source=radar_utils.MYRORSS_SOURCE_ID, sentinel_values=metadata_dict[radar_utils.SENTINEL_VALUE_COLUMN])) (column_max_refl_matrix_dbz, grid_point_latitudes_deg, grid_point_longitudes_deg) = radar_s2f.sparse_to_full_grid( this_sparse_grid_table, metadata_dict) num_grid_rows = len(grid_point_latitudes_deg) num_grid_columns = len(grid_point_longitudes_deg) linear_indices_to_consider = numpy.where( numpy.reshape(column_max_refl_matrix_dbz, num_grid_rows * num_grid_columns) >= critical_reflectivity_dbz)[0] print( 'Echo-top calculation is needed at only {0:d}/{1:d} horizontal grid ' 'points!').format(len(linear_indices_to_consider), num_grid_rows * num_grid_columns) echo_top_matrix_m_asl = numpy.full((num_grid_rows, num_grid_columns), numpy.nan) num_horiz_points_to_consider = len(linear_indices_to_consider) if num_horiz_points_to_consider == 0: return echo_top_matrix_m_asl grid_rows_to_consider, grid_columns_to_consider = numpy.unravel_index( linear_indices_to_consider, (num_grid_rows, num_grid_columns)) reflectivity_matrix_dbz = numpy.full( (num_grid_heights, num_horiz_points_to_consider), numpy.nan) for k in range(num_grid_heights): print 'Reading "{0:s}" for echo-top calculation...'.format( single_height_refl_file_names[k]) this_metadata_dict = myrorss_and_mrms_io.read_metadata_from_raw_file( single_height_refl_file_names[k], data_source=radar_utils.MYRORSS_SOURCE_ID) this_sparse_grid_table = ( myrorss_and_mrms_io.read_data_from_sparse_grid_file( single_height_refl_file_names[k], field_name_orig=this_metadata_dict[ myrorss_and_mrms_io.FIELD_NAME_COLUMN_ORIG], data_source=radar_utils.MYRORSS_SOURCE_ID, sentinel_values=this_metadata_dict[ radar_utils.SENTINEL_VALUE_COLUMN])) this_reflectivity_matrix_dbz, _, _ = radar_s2f.sparse_to_full_grid( this_sparse_grid_table, this_metadata_dict, ignore_if_below=lowest_refl_to_consider_dbz) reflectivity_matrix_dbz[k, :] = this_reflectivity_matrix_dbz[ grid_rows_to_consider, grid_columns_to_consider] print 'Computing echo tops at the {0:d} horizontal grid points...'.format( num_horiz_points_to_consider) for i in range(num_horiz_points_to_consider): echo_top_matrix_m_asl[ grid_rows_to_consider[i], grid_columns_to_consider[i]] = ( radar_utils.get_echo_top_single_column( reflectivities_dbz=reflectivity_matrix_dbz[:, i], heights_m_asl=grid_point_heights_m_asl, critical_reflectivity_dbz=critical_reflectivity_dbz)) return (numpy.flipud(echo_top_matrix_m_asl), grid_point_latitudes_deg[::-1], grid_point_longitudes_deg, metadata_dict)
def create_paneled_figure(num_rows, num_columns, figure_width_inches=DEFAULT_FIGURE_WIDTH_INCHES, figure_height_inches=DEFAULT_FIGURE_HEIGHT_INCHES, horizontal_spacing=0.075, vertical_spacing=0., shared_x_axis=False, shared_y_axis=False, keep_aspect_ratio=True): """Creates paneled figure. This method only initializes the panels. It does not plot anything. J = number of panel rows K = number of panel columns :param num_rows: J in the above discussion. :param num_columns: K in the above discussion. :param figure_width_inches: Width of the entire figure (including all panels). :param figure_height_inches: Height of the entire figure (including all panels). :param horizontal_spacing: Spacing (in figure-relative coordinates, from 0...1) between adjacent panel columns. :param vertical_spacing: Spacing (in figure-relative coordinates, from 0...1) between adjacent panel rows. :param shared_x_axis: Boolean flag. If True, all panels will share the same x-axis. :param shared_y_axis: Boolean flag. If True, all panels will share the same y-axis. :param keep_aspect_ratio: Boolean flag. If True, the aspect ratio of each panel will be preserved (reflect the aspect ratio of the data plotted therein). :return: figure_object: Figure handle (instance of `matplotlib.figure.Figure`). :return: axes_object_matrix: J-by-K numpy array of axes handles (instances of `matplotlib.axes._subplots.AxesSubplot`). """ error_checking.assert_is_geq(horizontal_spacing, 0.) error_checking.assert_is_less_than(horizontal_spacing, 1.) error_checking.assert_is_geq(vertical_spacing, 0.) error_checking.assert_is_less_than(vertical_spacing, 1.) error_checking.assert_is_boolean(shared_x_axis) error_checking.assert_is_boolean(shared_y_axis) error_checking.assert_is_boolean(keep_aspect_ratio) figure_object, axes_object_matrix = pyplot.subplots( num_rows, num_columns, sharex=shared_x_axis, sharey=shared_y_axis, figsize=(figure_width_inches, figure_height_inches)) if num_rows == num_columns == 1: axes_object_matrix = numpy.full((1, 1), axes_object_matrix, dtype=object) if num_rows == 1 or num_columns == 1: axes_object_matrix = numpy.reshape(axes_object_matrix, (num_rows, num_columns)) pyplot.subplots_adjust(left=0.02, bottom=0.02, right=0.98, top=0.95, hspace=horizontal_spacing, wspace=vertical_spacing) if not keep_aspect_ratio: return figure_object, axes_object_matrix for i in range(num_rows): for j in range(num_columns): axes_object_matrix[i][j].set(aspect='equal') return figure_object, axes_object_matrix