def check_evaluation_pairs(class_probability_matrix, observed_labels): """Checks evaluation pairs for errors. P = number of evaluation pairs K = number of classes :param class_probability_matrix: P-by-K numpy array of floats. class_probability_matrix[i, k] is the predicted probability that the [i]th example belongs to the [k]th class. :param observed_labels: length-P numpy array of integers. If observed_labels[i] = k, the [i]th example truly belongs to the [k]th class. """ error_checking.assert_is_numpy_array( class_probability_matrix, num_dimensions=2) error_checking.assert_is_geq_numpy_array(class_probability_matrix, 0.) error_checking.assert_is_leq_numpy_array(class_probability_matrix, 1.) num_evaluation_pairs = class_probability_matrix.shape[0] num_classes = class_probability_matrix.shape[1] error_checking.assert_is_numpy_array( observed_labels, exact_dimensions=numpy.array([num_evaluation_pairs])) error_checking.assert_is_integer_numpy_array(observed_labels) error_checking.assert_is_geq_numpy_array(observed_labels, 0) error_checking.assert_is_less_than_numpy_array(observed_labels, num_classes)
def _check_input_events(event_x_coords_metres, event_y_coords_metres, integer_event_ids): """Checks inputs to `count_events_on_equidistant_grid`. :param event_x_coords_metres: See doc for `count_events_on_equidistant_grid`. :param event_y_coords_metres: Same. :param integer_event_ids: Same. """ error_checking.assert_is_numpy_array_without_nan(event_x_coords_metres) error_checking.assert_is_numpy_array(event_x_coords_metres, num_dimensions=1) num_events = len(event_x_coords_metres) error_checking.assert_is_numpy_array_without_nan(event_y_coords_metres) error_checking.assert_is_numpy_array(event_y_coords_metres, exact_dimensions=numpy.array( [num_events])) if integer_event_ids is not None: error_checking.assert_is_integer_numpy_array(integer_event_ids) error_checking.assert_is_numpy_array(integer_event_ids, exact_dimensions=numpy.array( [num_events]))
def extract_radar_grid_points(field_matrix, row_indices, column_indices): """Extracts grid points from radar field. M = number of rows (unique grid-point latitudes) N = number of columns (unique grid-point longitudes) P = number of points to extract :param field_matrix: M-by-N numpy array with values of a single radar field. :param row_indices: length-P numpy array with row indices of points to extract. :param column_indices: length-P numpy array with column indices of points to extract. :return: extracted_values: length-P numpy array of values extracted from field_matrix. """ error_checking.assert_is_real_numpy_array(field_matrix) error_checking.assert_is_numpy_array(field_matrix, num_dimensions=2) num_grid_rows = field_matrix.shape[0] num_grid_columns = field_matrix.shape[1] error_checking.assert_is_integer_numpy_array(row_indices) error_checking.assert_is_geq_numpy_array(row_indices, 0) error_checking.assert_is_less_than_numpy_array(row_indices, num_grid_rows) error_checking.assert_is_integer_numpy_array(column_indices) error_checking.assert_is_geq_numpy_array(column_indices, 0) error_checking.assert_is_less_than_numpy_array(column_indices, num_grid_columns) return field_matrix[row_indices, column_indices]
def write_file( pickle_file_name, activation_matrix, storm_ids, storm_times_unix_sec, model_file_name, component_type_string, target_class=None, layer_name=None, neuron_index_matrix=None, channel_indices=None): """Writes activations to Pickle file. E = number of examples (storm objects) C = number of model components (classes, neurons, or channels) for which activations were computed :param pickle_file_name: Path to output file. :param activation_matrix: E-by-C numpy array of activations, where activation_matrix[i, j] = activation of the [j]th model component for the [i]th example. :param storm_ids: length-E list of storm IDs. :param storm_times_unix_sec: length-E numpy array of storm times. :param model_file_name: Path to file with trained model. :param component_type_string: See doc for `check_metadata`. :param target_class: Same. :param layer_name: Same. :param neuron_index_matrix: Same. :param channel_indices: Same. """ num_components = check_metadata( component_type_string=component_type_string, target_class=target_class, layer_name=layer_name, neuron_index_matrix=neuron_index_matrix, channel_indices=channel_indices) error_checking.assert_is_string(model_file_name) error_checking.assert_is_string_list(storm_ids) error_checking.assert_is_numpy_array( numpy.array(storm_ids), num_dimensions=1) num_examples = len(storm_ids) error_checking.assert_is_integer_numpy_array(storm_times_unix_sec) error_checking.assert_is_numpy_array( storm_times_unix_sec, exact_dimensions=numpy.array([num_examples])) error_checking.assert_is_numpy_array_without_nan(activation_matrix) error_checking.assert_is_numpy_array( activation_matrix, exact_dimensions=numpy.array([num_examples, num_components])) metadata_dict = { STORM_IDS_KEY: storm_ids, STORM_TIMES_KEY: storm_times_unix_sec, MODEL_FILE_NAME_KEY: model_file_name, COMPONENT_TYPE_KEY: component_type_string, TARGET_CLASS_KEY: target_class, LAYER_NAME_KEY: layer_name, NEURON_INDICES_KEY: neuron_index_matrix, CHANNEL_INDICES_KEY: channel_indices, } file_system_utils.mkdir_recursive_if_necessary(file_name=pickle_file_name) pickle_file_handle = open(pickle_file_name, 'wb') pickle.dump(activation_matrix, pickle_file_handle) pickle.dump(metadata_dict, pickle_file_handle) pickle_file_handle.close()
def check_metadata(component_type_string, target_class=None, layer_name=None, ideal_activation=None, neuron_indices=None, channel_index=None): """Error-checks metadata for saliency calculations. :param component_type_string: Component type (must be accepted by `model_interpretation.check_component_type`). :param target_class: See doc for `get_saliency_maps_for_class_activation`. :param layer_name: See doc for `get_saliency_maps_for_neuron_activation` or `get_saliency_maps_for_channel_activation`. :param ideal_activation: Same. :param neuron_indices: See doc for `get_saliency_maps_for_neuron_activation`. :param channel_index: See doc for `get_saliency_maps_for_class_activation`. :return: metadata_dict: Dictionary with the following keys. metadata_dict['component_type_string']: See input doc. metadata_dict['target_class']: Same. metadata_dict['layer_name']: Same. metadata_dict['ideal_activation']: Same. metadata_dict['neuron_indices']: Same. metadata_dict['channel_index']: Same. """ model_interpretation.check_component_type(component_type_string) if (component_type_string == model_interpretation.CLASS_COMPONENT_TYPE_STRING): error_checking.assert_is_integer(target_class) error_checking.assert_is_geq(target_class, 0) if component_type_string in [ model_interpretation.NEURON_COMPONENT_TYPE_STRING, model_interpretation.CHANNEL_COMPONENT_TYPE_STRING ]: error_checking.assert_is_string(layer_name) if ideal_activation is not None: error_checking.assert_is_greater(ideal_activation, 0.) if (component_type_string == model_interpretation.NEURON_COMPONENT_TYPE_STRING): error_checking.assert_is_integer_numpy_array(neuron_indices) error_checking.assert_is_geq_numpy_array(neuron_indices, 0) error_checking.assert_is_numpy_array(neuron_indices, num_dimensions=1) if (component_type_string == model_interpretation.CHANNEL_COMPONENT_TYPE_STRING): error_checking.assert_is_integer(channel_index) error_checking.assert_is_geq(channel_index, 0) return { COMPONENT_TYPE_KEY: component_type_string, TARGET_CLASS_KEY: target_class, LAYER_NAME_KEY: layer_name, IDEAL_ACTIVATION_KEY: ideal_activation, NEURON_INDICES_KEY: neuron_indices, CHANNEL_INDEX_KEY: channel_index }
def check_component_metadata( component_type_string, target_class=None, layer_name=None, neuron_indices=None, channel_index=None): """Checks metadata for model component. :param component_type_string: Component type (must be accepted by `check_component_type`). :param target_class: [used only if component_type_string = "class"] Target class. Integer from 0...(K - 1), where K = number of classes. :param layer_name: [used only if component_type_string = "neuron" or "channel"] Name of layer containing neuron or channel. :param neuron_indices: [used only if component_type_string = "neuron"] 1-D numpy array with indices of neuron. :param channel_index: [used only if component_type_string = "channel"] Index of channel. """ check_component_type(component_type_string) if component_type_string == CLASS_COMPONENT_TYPE_STRING: error_checking.assert_is_integer(target_class) error_checking.assert_is_geq(target_class, 0) if component_type_string in [NEURON_COMPONENT_TYPE_STRING, CHANNEL_COMPONENT_TYPE_STRING]: error_checking.assert_is_string(layer_name) if component_type_string == NEURON_COMPONENT_TYPE_STRING: error_checking.assert_is_integer_numpy_array(neuron_indices) error_checking.assert_is_geq_numpy_array(neuron_indices, 0) error_checking.assert_is_numpy_array(neuron_indices, num_dimensions=1) if component_type_string == CHANNEL_COMPONENT_TYPE_STRING: error_checking.assert_is_integer(channel_index) error_checking.assert_is_geq(channel_index, 0)
def write_ids_and_times(full_id_strings, storm_times_unix_sec, pickle_file_name): """Writes full storm IDs and valid times (minimal metadata) to Pickle file. N = number of storm objects :param full_id_strings: length-N list of full IDs. :param storm_times_unix_sec: length-N numpy array of valid times. :param pickle_file_name: Path to output file. """ error_checking.assert_is_string_list(full_id_strings) error_checking.assert_is_numpy_array( numpy.array(full_id_strings), num_dimensions=1) num_storm_objects = len(full_id_strings) error_checking.assert_is_integer_numpy_array(storm_times_unix_sec) error_checking.assert_is_numpy_array( storm_times_unix_sec, exact_dimensions=numpy.array([num_storm_objects], dtype=int) ) metadata_dict = { FULL_IDS_KEY: full_id_strings, STORM_TIMES_KEY: storm_times_unix_sec, } file_system_utils.mkdir_recursive_if_necessary(file_name=pickle_file_name) pickle_file_handle = open(pickle_file_name, 'wb') pickle.dump(metadata_dict, pickle_file_handle) pickle_file_handle.close()
def _check_evaluation_pairs(class_probability_matrix, observed_labels): """Checks evaluation pairs for errors. P = number of evaluation pairs K = number of classes :param class_probability_matrix: P-by-K numpy array of floats. class_probability_matrix[i, k] is the predicted probability that the [i]th example belongs to the [k]th class. :param observed_labels: length-P numpy array of integers. If observed_labels[i] = k, the [i]th example truly belongs to the [k]th class. """ # TODO(thunderhoser): This method is duplicated from evaluation_utils.py. I # can't just import evaluation_utils.py, because this leads to a circular # import chain. The answer is to put this method somewhere more general. error_checking.assert_is_numpy_array(class_probability_matrix, num_dimensions=2) error_checking.assert_is_geq_numpy_array(class_probability_matrix, 0.) error_checking.assert_is_leq_numpy_array(class_probability_matrix, 1.) num_evaluation_pairs = class_probability_matrix.shape[0] num_classes = class_probability_matrix.shape[1] error_checking.assert_is_numpy_array(observed_labels, exact_dimensions=numpy.array( [num_evaluation_pairs])) error_checking.assert_is_integer_numpy_array(observed_labels) error_checking.assert_is_geq_numpy_array(observed_labels, 0) error_checking.assert_is_less_than_numpy_array(observed_labels, num_classes)
def _check_model_fields(field_matrix, field_name, pressure_level_pascals, valid_times_unix_sec): """Checks model fields for errors. M = number of rows (unique grid-point y-coordinates) N = number of columns (unique grid-point x-coordinates) T = number of time steps :param field_matrix: T-by-M-by-N numpy array with values of a single field (atmospheric variable). :param field_name: Field name in GewitterGefahr format. :param pressure_level_pascals: Pressure level (integer Pascals). :param valid_times_unix_sec: length-T numpy array of valid times. """ check_field_name(field_name, require_standard=False) error_checking.assert_is_integer(pressure_level_pascals) error_checking.assert_is_integer_numpy_array(valid_times_unix_sec) error_checking.assert_is_numpy_array(valid_times_unix_sec, num_dimensions=1) num_times = len(valid_times_unix_sec) num_grid_rows, num_grid_columns = nwp_model_utils.get_grid_dimensions( model_name=nwp_model_utils.NARR_MODEL_NAME) error_checking.assert_is_real_numpy_array(field_matrix) error_checking.assert_is_numpy_array(field_matrix, num_dimensions=3) error_checking.assert_is_numpy_array(field_matrix, exact_dimensions=numpy.array([ num_times, num_grid_rows, num_grid_columns ]))
def check_metadata(layer_name, neuron_indices, ideal_activation, num_iterations, learning_rate, l2_weight): """Checks metadata for errors. :param layer_name: Name of layer with relevant neuron. :param neuron_indices: 1-D numpy array with indices of relevant neuron. Must have length D - 1, where D = number of dimensions in layer output. The first dimension is the batch dimension, which always has length `None` in Keras. :param ideal_activation: Ideal neuron activation, used to define loss function. The loss function will be (neuron_activation - ideal_activation)**2. :param num_iterations: Number of iterations for gradient descent. :param learning_rate: Learning rate for gradient descent. :param l2_weight: L2 weight (penalty for difference between initial and final predictor matrix) in loss function. """ error_checking.assert_is_string(layer_name) error_checking.assert_is_integer_numpy_array(neuron_indices) error_checking.assert_is_geq_numpy_array(neuron_indices, 0) error_checking.assert_is_numpy_array(neuron_indices, num_dimensions=1) error_checking.assert_is_not_nan(ideal_activation) error_checking.assert_is_integer(num_iterations) error_checking.assert_is_greater(num_iterations, 0) error_checking.assert_is_greater(learning_rate, 0.) error_checking.assert_is_less_than(learning_rate, 1.) error_checking.assert_is_geq(l2_weight, 0.)
def _check_frontal_image(image_matrix, assert_binary=False): """Checks frontal image for errors. M = number of grid rows (unique y-coordinates at grid points) N = number of grid columns (unique x-coordinates at grid points) :param image_matrix: M-by-N numpy array of integers. May be either binary (2-class) or ternary (3-class). If binary, all elements must be in {0, 1} and element [i, j] indicates whether or not a front intersects grid cell [i, j]. If ternary, elements must be in `VALID_INTEGER_IDS` and element [i, j] indicates the type of front (warm, cold, or none) intersecting grid cell [i, j]. :param assert_binary: Boolean flag. If True and image is non-binary, this method will error out. """ error_checking.assert_is_numpy_array(image_matrix, num_dimensions=2) error_checking.assert_is_integer_numpy_array(image_matrix) error_checking.assert_is_geq_numpy_array(image_matrix, numpy.min(VALID_INTEGER_IDS)) if assert_binary: error_checking.assert_is_leq_numpy_array(image_matrix, ANY_FRONT_INTEGER_ID) else: error_checking.assert_is_leq_numpy_array(image_matrix, numpy.max(VALID_INTEGER_IDS))
def _check_reflectivity_heights(heights_m_agl): """Ensures that all reflectivity heights are valid. :param heights_m_agl: 1-D numpy array of heights (integer metres above ground level). :raises: ValueError: if any element of heights_m_agl is invalid. """ error_checking.assert_is_integer_numpy_array(heights_m_agl) error_checking.assert_is_numpy_array(heights_m_agl, num_dimensions=1) # Data source doesn't matter in this method call (i.e., replacing # MYRORSS_SOURCE_ID with MRMS_SOURCE_ID would have no effect). valid_heights_m_agl = get_valid_heights_for_field( REFL_NAME, MYRORSS_SOURCE_ID) for this_height_m_agl in heights_m_agl: if this_height_m_agl in valid_heights_m_agl: continue error_string = ( '\n\n' + str(valid_heights_m_agl) + '\n\nValid reflectivity heights (metres AGL, listed above) do not ' 'include ' + str(this_height_m_agl) + ' m AGL.') raise ValueError(error_string)
def check_metadata(activation_layer_name, vector_output_layer_name, output_neuron_indices, ideal_activation): """Checks metadata for errors. :param activation_layer_name: Name of activation layer. :param vector_output_layer_name: Name of layer that outputs predictions for vector target variables. :param output_neuron_indices: length-2 numpy array with indices of output neuron (height index, channel index). Class activation will be computed with respect to the output of this neuron. :param ideal_activation: Ideal neuron activation, used to define loss function. The loss function will be (output_neuron_activation - ideal_activation)**2. """ error_checking.assert_is_string(activation_layer_name) error_checking.assert_is_string(vector_output_layer_name) error_checking.assert_is_integer_numpy_array(output_neuron_indices) error_checking.assert_is_geq_numpy_array(output_neuron_indices, 0) error_checking.assert_is_numpy_array(output_neuron_indices, exact_dimensions=numpy.array( [2], dtype=int)) error_checking.assert_is_not_nan(ideal_activation)
def dimensions_to_grid_id(grid_dimensions): """Determines grid from dimensions. :param grid_dimensions: 1-D numpy array with [num_rows, num_columns]. :return: grid_id: String ID for grid. :raises: ValueError: if dimensions do not match a known grid. """ error_checking.assert_is_numpy_array(grid_dimensions, exact_dimensions=numpy.array([2])) error_checking.assert_is_integer_numpy_array(grid_dimensions) error_checking.assert_is_greater_numpy_array(grid_dimensions, 1) these_dimensions = get_grid_dimensions(NARR_MODEL_NAME) if numpy.array_equal(these_dimensions, grid_dimensions): return ID_FOR_221GRID for this_grid_id in RUC_GRID_IDS: these_dimensions = get_grid_dimensions(RUC_MODEL_NAME, this_grid_id) if numpy.array_equal(these_dimensions, grid_dimensions): return this_grid_id raise ValueError('Dimensions (' + str(grid_dimensions[0]) + ' rows x ' + str(grid_dimensions[1]) + ' columns) do not match a known grid.')
def _check_input_data_for_learning( input_table, feature_names, target_name=None): """Checks input data (to machine-learning model) for errors. :param input_table: pandas DataFrame, where each row is one example (data point). :param feature_names: 1-D list with names of features (predictor variables). Each feature must be a column of input_table. :param target_name: Name of target variable (predictand). Must be a column of input_table. All values must be 0 or 1. """ error_checking.assert_is_string_list(feature_names) error_checking.assert_is_numpy_array( numpy.array(feature_names), num_dimensions=1) if target_name is None: error_checking.assert_columns_in_dataframe(input_table, feature_names) return error_checking.assert_is_string(target_name) error_checking.assert_columns_in_dataframe( input_table, feature_names + [target_name]) target_values = input_table[target_name].values error_checking.assert_is_integer_numpy_array(target_values) error_checking.assert_is_geq_numpy_array(target_values, 0) error_checking.assert_is_leq_numpy_array(target_values, 1)
def _plot_inset_histogram_for_attributes_diagram( figure_object, num_examples_by_bin, bar_face_colour=DEFAULT_HISTOGRAM_FACE_COLOUR, bar_edge_colour=DEFAULT_HISTOGRAM_EDGE_COLOUR, bar_edge_width=DEFAULT_HISTOGRAM_EDGE_WIDTH): """Plots forecast histogram inset in attributes diagram. For more on the attributes diagram, see Hsu and Murphy (1986). B = number of forecast bins :param figure_object: Instance of `matplotlib.figure.Figure`. :param num_examples_by_bin: length-B numpy array with number of examples in each forecast bin. :param bar_face_colour: Colour (in any format accepted by `matplotlib.colors`) for interior of histogram bars. :param bar_edge_colour: Colour for edge of histogram bars. :param bar_edge_width: Width for edge of histogram bars. """ error_checking.assert_is_integer_numpy_array(num_examples_by_bin) error_checking.assert_is_numpy_array(num_examples_by_bin, num_dimensions=1) error_checking.assert_is_geq_numpy_array(num_examples_by_bin, 0) num_forecast_bins = len(num_examples_by_bin) error_checking.assert_is_geq(num_forecast_bins, 2) example_frequency_by_bin = (num_examples_by_bin.astype(float) / numpy.sum(num_examples_by_bin)) forecast_bin_edges = numpy.linspace(0., 1., num=num_forecast_bins + 1) forecast_bin_width = forecast_bin_edges[1] - forecast_bin_edges[0] forecast_bin_centers = forecast_bin_edges[:-1] + forecast_bin_width / 2 inset_axes_object = figure_object.add_axes([ INSET_HISTOGRAM_LEFT_EDGE, INSET_HISTOGRAM_BOTTOM_EDGE, INSET_HISTOGRAM_WIDTH, INSET_HISTOGRAM_HEIGHT ]) inset_axes_object.bar( forecast_bin_centers, example_frequency_by_bin, forecast_bin_width, color=plotting_utils.colour_from_numpy_to_tuple(bar_face_colour), edgecolor=plotting_utils.colour_from_numpy_to_tuple(bar_edge_colour), linewidth=bar_edge_width) max_y_tick_value = rounder.floor_to_nearest( 1.05 * numpy.max(example_frequency_by_bin), INSET_HISTOGRAM_Y_TICK_SPACING) num_y_ticks = 1 + int( numpy.round(max_y_tick_value / INSET_HISTOGRAM_Y_TICK_SPACING)) y_tick_values = numpy.linspace(0., max_y_tick_value, num=num_y_ticks) pyplot.xticks(INSET_HISTOGRAM_X_TICKS, axes=inset_axes_object) pyplot.yticks(y_tick_values, axes=inset_axes_object) inset_axes_object.set_xlim(0., 1.) inset_axes_object.set_ylim(0., 1.05 * numpy.max(example_frequency_by_bin))
def write_ensembled_predictions(pickle_file_name, class_probability_matrix, valid_times_unix_sec, narr_mask_matrix, prediction_dir_name_by_model, model_weights): """Writes ensembled predictions to Pickle file. An "ensembled prediction" is an ensemble of gridded predictions from two or more NFA models. T = number of time steps M = number of rows in grid N = number of columns in grid C = number of classes :param pickle_file_name: Path to output file. :param class_probability_matrix: T-by-M-by-N-by-C numpy array of class probabilities. :param valid_times_unix_sec: length-T numpy array of time steps. :param narr_mask_matrix: See doc for `write_gridded_predictions`. :param prediction_dir_name_by_model: See doc for `check_ensemble_metadata`. :param model_weights: Same. """ error_checking.assert_is_geq_numpy_array(class_probability_matrix, 0.) error_checking.assert_is_leq_numpy_array(class_probability_matrix, 1.) error_checking.assert_is_numpy_array(class_probability_matrix, num_dimensions=4) ml_utils.check_narr_mask(narr_mask_matrix) these_expected_dim = numpy.array(class_probability_matrix.shape[1:3], dtype=int) error_checking.assert_is_numpy_array(narr_mask_matrix, exact_dimensions=these_expected_dim) error_checking.assert_is_integer_numpy_array(valid_times_unix_sec) num_times = class_probability_matrix.shape[0] these_expected_dim = numpy.array([num_times], dtype=int) error_checking.assert_is_numpy_array(valid_times_unix_sec, exact_dimensions=these_expected_dim) check_ensemble_metadata( prediction_dir_name_by_model=prediction_dir_name_by_model, model_weights=model_weights) ensemble_dict = { CLASS_PROBABILITIES_KEY: class_probability_matrix, VALID_TIMES_KEY: valid_times_unix_sec, NARR_MASK_KEY: narr_mask_matrix, MODEL_DIRECTORIES_KEY: prediction_dir_name_by_model, MODEL_WEIGHTS_KEY: model_weights } file_system_utils.mkdir_recursive_if_necessary(file_name=pickle_file_name) pickle_file_handle = open(pickle_file_name, 'wb') pickle.dump(ensemble_dict, pickle_file_handle) pickle_file_handle.close()
def check_metadata(component_type_string, target_class=None, layer_name=None, neuron_index_matrix=None, channel_indices=None): """Error-checks metadata for activation calculations. C = number of model components (classes, neurons, or channels) for which activations were computed :param component_type_string: Component type (must be accepted by `model_interpretation.check_component_type`). :param target_class: See doc for `get_class_activation_for_examples`. :param layer_name: See doc for `get_neuron_activation_for_examples` or `get_channel_activation_for_examples`. :param neuron_index_matrix: [used only if component_type_string = "neuron"] C-by-? numpy array, where neuron_index_matrix[j, :] contains array indices of the [j]th neuron whose activation was computed. :param channel_indices: [used only if component_type_string = "channel"] length-C numpy array, where channel_indices[j] is the index of the [j]th channel whose activation was computed. :return: num_components: Number of model components (classes, neurons, or channels) whose activation was computed. """ model_interpretation.check_component_type(component_type_string) if (component_type_string == model_interpretation.CLASS_COMPONENT_TYPE_STRING): error_checking.assert_is_integer(target_class) error_checking.assert_is_geq(target_class, 0) num_components = 1 if component_type_string in [ model_interpretation.NEURON_COMPONENT_TYPE_STRING, model_interpretation.CHANNEL_COMPONENT_TYPE_STRING ]: error_checking.assert_is_string(layer_name) if (component_type_string == model_interpretation.NEURON_COMPONENT_TYPE_STRING): error_checking.assert_is_integer_numpy_array(neuron_index_matrix) error_checking.assert_is_geq_numpy_array(neuron_index_matrix, 0) error_checking.assert_is_numpy_array(neuron_index_matrix, num_dimensions=2) num_components = neuron_index_matrix.shape[0] if (component_type_string == model_interpretation.CHANNEL_COMPONENT_TYPE_STRING): error_checking.assert_is_integer_numpy_array(channel_indices) error_checking.assert_is_geq_numpy_array(channel_indices, 0) num_components = len(channel_indices) return num_components
def _get_random_sample_points( num_points, for_downsized_examples, narr_mask_matrix=None): """Samples random points from NARR grid. M = number of rows in NARR grid N = number of columns in NARR grid P = number of points sampled :param num_points: Number of points to sample. :param for_downsized_examples: Boolean flag. If True, this method will sample center points for downsized images. If False, will sample evaluation points from a full-size image. :param narr_mask_matrix: M-by-N numpy array of integers (0 or 1). If narr_mask_matrix[i, j] = 0, cell [i, j] in the full grid will never be sampled. If `narr_mask_matrix is None`, any grid cell can be sampled. :return: row_indices: length-P numpy array with row indices of sampled points. :return: column_indices: length-P numpy array with column indices of sampled points. """ if for_downsized_examples: num_grid_rows, num_grid_columns = nwp_model_utils.get_grid_dimensions( model_name=nwp_model_utils.NARR_MODEL_NAME) else: num_grid_rows = ( ml_utils.LAST_NARR_ROW_FOR_FCN_INPUT - ml_utils.FIRST_NARR_ROW_FOR_FCN_INPUT + 1 ) num_grid_columns = ( ml_utils.LAST_NARR_COLUMN_FOR_FCN_INPUT - ml_utils.FIRST_NARR_COLUMN_FOR_FCN_INPUT + 1 ) narr_mask_matrix = None if narr_mask_matrix is None: num_grid_cells = num_grid_rows * num_grid_columns possible_linear_indices = numpy.linspace( 0, num_grid_cells - 1, num=num_grid_cells, dtype=int) else: error_checking.assert_is_integer_numpy_array(narr_mask_matrix) error_checking.assert_is_geq_numpy_array(narr_mask_matrix, 0) error_checking.assert_is_leq_numpy_array(narr_mask_matrix, 1) error_checking.assert_is_numpy_array( narr_mask_matrix, exact_dimensions=numpy.array([num_grid_rows, num_grid_columns])) possible_linear_indices = numpy.where( numpy.ravel(narr_mask_matrix) == 1)[0] linear_indices = numpy.random.choice( possible_linear_indices, size=num_points, replace=False) return numpy.unravel_index( linear_indices, (num_grid_rows, num_grid_columns))
def test_draw_sample_large(self): """Ensures correct output from draw_sample. In this case, number of examples to draw = number of original examples. """ this_sample_vector, these_sample_indices = bootstrapping.draw_sample( INPUT_VECTOR, NUM_EXAMPLES_LARGE) self.assertTrue(len(this_sample_vector) == NUM_EXAMPLES_LARGE) self.assertTrue(len(these_sample_indices) == NUM_EXAMPLES_LARGE) error_checking.assert_is_integer_numpy_array(these_sample_indices)
def get_events_in_months(desired_months, verbose, event_months=None, event_times_unix_sec=None): """Finds events in desired months. If `event_months is None`, `event_times_unix_sec` will be used. :param desired_months: 1-D numpy array of desired months (range 1...12). :param verbose: Boolean flag. If True, will print messages to command window. :param event_months: 1-D numpy array of event months (range 1...12). :param event_times_unix_sec: 1-D numpy array of event times. :return: desired_event_indices: 1-D numpy array with indices of events in desired months. :return: event_months: See input doc. """ if event_months is None: error_checking.assert_is_numpy_array(event_times_unix_sec, num_dimensions=1) event_months = numpy.array([ int(time_conversion.unix_sec_to_string(t, '%m')) for t in event_times_unix_sec ], dtype=int) error_checking.assert_is_integer_numpy_array(event_months) error_checking.assert_is_numpy_array(event_months, num_dimensions=1) error_checking.assert_is_geq_numpy_array(event_months, 1) error_checking.assert_is_leq_numpy_array(event_months, NUM_MONTHS_IN_YEAR) error_checking.assert_is_integer_numpy_array(desired_months) error_checking.assert_is_numpy_array(desired_months, num_dimensions=1) error_checking.assert_is_geq_numpy_array(desired_months, 1) error_checking.assert_is_leq_numpy_array(desired_months, NUM_MONTHS_IN_YEAR) error_checking.assert_is_boolean(verbose) desired_event_flags = numpy.array( [m in desired_months for m in event_months], dtype=bool) desired_event_indices = numpy.where(desired_event_flags)[0] if not verbose: return desired_event_indices, event_months print('{0:d} of {1:d} events are in months {2:s}!'.format( len(desired_event_indices), len(event_months), str(desired_months))) return desired_event_indices, event_months
def get_events_in_hours(desired_hours, verbose, event_hours=None, event_times_unix_sec=None): """Finds events in desired hours. If `event_hours is None`, `event_times_unix_sec` will be used. :param desired_hours: 1-D numpy array of desired hours (range 0...23). :param verbose: Boolean flag. If True, will print messages to command window. :param event_hours: 1-D numpy array of event hours (range 0...23). :param event_times_unix_sec: 1-D numpy array of event times. :return: desired_event_indices: 1-D numpy array with indices of events in desired hours. """ if event_hours is None: error_checking.assert_is_numpy_array(event_times_unix_sec, num_dimensions=1) event_hours = numpy.array([ int(time_conversion.unix_sec_to_string(t, '%H')) for t in event_times_unix_sec ], dtype=int) error_checking.assert_is_integer_numpy_array(event_hours) error_checking.assert_is_numpy_array(event_hours, num_dimensions=1) error_checking.assert_is_geq_numpy_array(event_hours, 0) error_checking.assert_is_less_than_numpy_array(event_hours, NUM_HOURS_IN_DAY) error_checking.assert_is_integer_numpy_array(desired_hours) error_checking.assert_is_numpy_array(desired_hours, num_dimensions=1) error_checking.assert_is_geq_numpy_array(desired_hours, 0) error_checking.assert_is_less_than_numpy_array(desired_hours, NUM_HOURS_IN_DAY) error_checking.assert_is_boolean(verbose) desired_event_flags = numpy.array( [m in desired_hours for m in event_hours], dtype=bool) desired_event_indices = numpy.where(desired_event_flags)[0] if not verbose: return desired_event_indices, event_hours print('{0:d} of {1:d} events are in hours {2:s}!'.format( len(desired_event_indices), len(event_hours), str(desired_hours))) return desired_event_indices, event_hours
def check_target_array(target_array, num_dimensions, num_classes): """Error-checks target values. :param target_array: numpy array in one of two formats. [1] length-E integer numpy array of target values. All values are -2 ("dead storm") or 0...[K - 1], where K = number of classes. [2] E-by-K numpy array, where each value is 0 or 1. If target_array[i, k] = 1, the [i]th storm object belongs to the [k]th class. Classes are mutually exclusive and collectively exhaustive, so the sum across each row of the matrix is 1. :param num_dimensions: Number of dimensions expected in `target_array`. :param num_classes: Number of classes that should be represented in `target_array`. """ error_checking.assert_is_integer(num_dimensions) error_checking.assert_is_geq(num_dimensions, 1) error_checking.assert_is_leq(num_dimensions, 2) error_checking.assert_is_integer(num_classes) error_checking.assert_is_geq(num_classes, 2) num_examples = target_array.shape[0] if num_dimensions == 1: error_checking.assert_is_integer_numpy_array(target_array) these_expected_dim = numpy.array([num_examples], dtype=int) error_checking.assert_is_numpy_array( target_array, exact_dimensions=these_expected_dim) # TODO(thunderhoser): This is a HACK. Should do better input-checking. # live_storm_object_indices = numpy.where( # target_array != target_val_utils.DEAD_STORM_INTEGER # )[0] # error_checking.assert_is_geq_numpy_array( # target_array[live_storm_object_indices], 0 # ) error_checking.assert_is_geq_numpy_array( target_array, target_val_utils.DEAD_STORM_INTEGER) error_checking.assert_is_less_than_numpy_array(target_array, num_classes) else: error_checking.assert_is_geq_numpy_array(target_array, 0) error_checking.assert_is_leq_numpy_array(target_array, 1) these_expected_dim = numpy.array([num_examples, num_classes], dtype=int) error_checking.assert_is_numpy_array( target_array, exact_dimensions=these_expected_dim)
def grid_points_in_poly_to_binary_matrix(row_indices, column_indices): """Converts list of grid points in polygon to binary image. P = number of grid points in polygon M = max(row_indices) - min(row_indices) + 3 = number of rows in binary image N = max(column_indices) - min(column_indices) + 3 = number of columns in binary image :param row_indices: length-P numpy array with row numbers (integers) of grid points in polygon. :param column_indices: length-P numpy array with column numbers (integers) of grid points in polygon. :return: binary_matrix: M-by-N numpy array of Boolean flags. If binary_matrix[i, j] = True, pixel [i, j] is inside the polygon. :return: first_row_index: Used to convert row numbers from the binary image (which spans only a subgrid) to the full grid. Row 0 in the subgrid = row `first_row_index` in the full grid. :return: first_column_index: Same as above, but for column numbers. """ error_checking.assert_is_integer_numpy_array(row_indices) error_checking.assert_is_geq_numpy_array(row_indices, 0) error_checking.assert_is_numpy_array(row_indices, num_dimensions=1) num_points_in_polygon = len(row_indices) error_checking.assert_is_integer_numpy_array(column_indices) error_checking.assert_is_geq_numpy_array(column_indices, 0) error_checking.assert_is_numpy_array(column_indices, exact_dimensions=numpy.array( [num_points_in_polygon])) num_rows_in_subgrid = max(row_indices) - min(row_indices) + 3 num_columns_in_subgrid = max(column_indices) - min(column_indices) + 3 first_row_index = min(row_indices) - 1 first_column_index = min(column_indices) - 1 row_indices_in_subgrid = row_indices - first_row_index column_indices_in_subgrid = column_indices - first_column_index linear_indices_in_subgrid = numpy.ravel_multi_index( (row_indices_in_subgrid, column_indices_in_subgrid), (num_rows_in_subgrid, num_columns_in_subgrid)) binary_vector = numpy.full(num_rows_in_subgrid * num_columns_in_subgrid, False, dtype=bool) binary_vector[linear_indices_in_subgrid] = True binary_matrix = numpy.reshape( binary_vector, (num_rows_in_subgrid, num_columns_in_subgrid)) return binary_matrix, first_row_index, first_column_index
def _plot_inset_histogram_for_attributes_diagram(figure_object, num_examples_by_bin): """Plots forecast histogram inset in attributes diagram. For more on the attributes diagram, see Hsu and Murphy (1986). B = number of forecast bins :param figure_object: Instance of `matplotlib.figure.Figure`. :param num_examples_by_bin: length-B numpy array with number of examples in each forecast bin. """ error_checking.assert_is_integer_numpy_array(num_examples_by_bin) error_checking.assert_is_numpy_array(num_examples_by_bin, num_dimensions=1) error_checking.assert_is_geq_numpy_array(num_examples_by_bin, 0) num_forecast_bins = len(num_examples_by_bin) error_checking.assert_is_geq(num_forecast_bins, 2) example_frequency_by_bin = (num_examples_by_bin.astype(float) / numpy.sum(num_examples_by_bin)) forecast_bin_edges = numpy.linspace(0., 1., num=num_forecast_bins + 1) forecast_bin_width = forecast_bin_edges[1] - forecast_bin_edges[0] forecast_bin_centers = forecast_bin_edges[:-1] + forecast_bin_width / 2 inset_axes_object = figure_object.add_axes([ HISTOGRAM_LEFT_EDGE, HISTOGRAM_BOTTOM_EDGE, HISTOGRAM_AXES_WIDTH, HISTOGRAM_AXES_HEIGHT ]) inset_axes_object.bar( forecast_bin_centers, example_frequency_by_bin, forecast_bin_width, color=plotting_utils.colour_from_numpy_to_tuple(BAR_FACE_COLOUR), edgecolor=plotting_utils.colour_from_numpy_to_tuple(BAR_EDGE_COLOUR), linewidth=BAR_EDGE_WIDTH) max_y_tick_value = rounder.floor_to_nearest( 1.05 * numpy.max(example_frequency_by_bin), HISTOGRAM_Y_SPACING) num_y_ticks = 1 + int(numpy.round(max_y_tick_value / HISTOGRAM_Y_SPACING)) y_tick_values = numpy.linspace(0., max_y_tick_value, num=num_y_ticks) pyplot.xticks(HISTOGRAM_X_VALUES, axes=inset_axes_object) pyplot.yticks(y_tick_values, axes=inset_axes_object) inset_axes_object.set_xlim(0., 1.) inset_axes_object.set_ylim(0., 1.05 * numpy.max(example_frequency_by_bin)) inset_axes_object.set_title('Forecast histogram', fontsize=20)
def plot_narr_grid(frontal_grid_matrix, axes_object, basemap_object, first_row_in_narr_grid=0, first_column_in_narr_grid=0, opacity=DEFAULT_GRID_OPACITY): """Plots NARR grid points intersected by a warm front or cold front. This method plots data over a contiguous subset of the NARR grid, which need not be *strictly* a subset. In other words, the "subset" could be the full NARR grid. :param frontal_grid_matrix: See documentation for `front_utils.frontal_grid_to_points`. :param axes_object: Instance of `matplotlib.axes._subplots.AxesSubplot`. :param basemap_object: Instance of `mpl_toolkits.basemap.Basemap`. :param first_row_in_narr_grid: Row 0 in the subgrid is row `first_row_in_narr_grid` in the full NARR grid. :param first_column_in_narr_grid: Column 0 in the subgrid is row `first_column_in_narr_grid` in the full NARR grid. :param opacity: Opacity for colour map (in range 0...1). """ error_checking.assert_is_integer_numpy_array(frontal_grid_matrix) error_checking.assert_is_numpy_array(frontal_grid_matrix, num_dimensions=2) error_checking.assert_is_geq_numpy_array( frontal_grid_matrix, numpy.min(front_utils.VALID_INTEGER_IDS)) error_checking.assert_is_leq_numpy_array( frontal_grid_matrix, numpy.max(front_utils.VALID_INTEGER_IDS)) colour_map_object, _, colour_bounds = get_colour_map_for_grid() frontal_grid_matrix = numpy.ma.masked_where( frontal_grid_matrix == front_utils.NO_FRONT_INTEGER_ID, frontal_grid_matrix) narr_plotting.plot_xy_grid( data_matrix=frontal_grid_matrix, axes_object=axes_object, basemap_object=basemap_object, colour_map=colour_map_object, colour_minimum=colour_bounds[1], colour_maximum=colour_bounds[-2], first_row_in_narr_grid=first_row_in_narr_grid, first_column_in_narr_grid=first_column_in_narr_grid, opacity=opacity)
def plot_attributes_diagram(figure_object, axes_object, mean_forecast_by_bin, event_frequency_by_bin, num_examples_by_bin): """Plots attributes diagram (Hsu and Murphy 1986). :param figure_object: Instance of `matplotlib.figure.Figure`. :param axes_object: Instance of `matplotlib.axes._subplots.AxesSubplot`. :param mean_forecast_by_bin: See doc for `plot_reliability_curve`. :param event_frequency_by_bin: Same. :param num_examples_by_bin: See doc for `_plot_inset_histogram_for_attributes_diagram`. """ error_checking.assert_is_numpy_array(event_frequency_by_bin, num_dimensions=1) error_checking.assert_is_geq_numpy_array(event_frequency_by_bin, 0., allow_nan=True) error_checking.assert_is_leq_numpy_array(event_frequency_by_bin, 1., allow_nan=True) num_bins = len(mean_forecast_by_bin) expected_dim = numpy.array([num_bins], dtype=int) error_checking.assert_is_integer_numpy_array(num_examples_by_bin) error_checking.assert_is_numpy_array(num_examples_by_bin, exact_dimensions=expected_dim) error_checking.assert_is_geq_numpy_array(num_examples_by_bin, 0) non_empty_bin_indices = numpy.where(num_examples_by_bin > 0)[0] error_checking.assert_is_numpy_array_without_nan( event_frequency_by_bin[non_empty_bin_indices]) climatology = numpy.average( event_frequency_by_bin[non_empty_bin_indices], weights=num_examples_by_bin[non_empty_bin_indices]) _plot_background_of_attributes_diagram(axes_object=axes_object, climatology=climatology) _plot_inset_histogram_for_attributes_diagram( figure_object=figure_object, num_examples_by_bin=num_examples_by_bin) plot_reliability_curve(axes_object=axes_object, mean_forecast_by_bin=mean_forecast_by_bin, event_frequency_by_bin=event_frequency_by_bin)
def unzip_1day_tar_file(tar_file_name, spc_date_unix_sec=None, top_target_directory_name=None, scales_to_extract_metres2=None): """Unzips tar file with segmotion output for one SPC date. :param tar_file_name: Path to input file. :param spc_date_unix_sec: SPC date. :param top_target_directory_name: Name of top-level output directory. :param scales_to_extract_metres2: 1-D numpy array of tracking scales to extract. :return: target_directory_name: Path to output directory. This will be "<top_target_directory_name>/<yyyymmdd>", where <yyyymmdd> is the SPC date. """ error_checking.assert_file_exists(tar_file_name) error_checking.assert_is_greater_numpy_array(scales_to_extract_metres2, 0) error_checking.assert_is_integer_numpy_array(scales_to_extract_metres2) error_checking.assert_is_numpy_array(scales_to_extract_metres2, num_dimensions=1) num_scales_to_extract = len(scales_to_extract_metres2) spc_date_string = time_conversion.time_to_spc_date_string( spc_date_unix_sec) directory_names_to_unzip = [] for j in range(num_scales_to_extract): this_relative_stats_dir_name = _get_relative_stats_dir_physical_scale( spc_date_string, scales_to_extract_metres2[j]) this_relative_polygon_dir_name = ( _get_relative_polygon_dir_physical_scale( spc_date_string, scales_to_extract_metres2[j])) directory_names_to_unzip.append( this_relative_stats_dir_name.replace(spc_date_string + '/', '')) directory_names_to_unzip.append( this_relative_polygon_dir_name.replace(spc_date_string + '/', '')) target_directory_name = '{0:s}/{1:s}'.format(top_target_directory_name, spc_date_string) unzipping.unzip_tar(tar_file_name, target_directory_name=target_directory_name, file_and_dir_names_to_unzip=directory_names_to_unzip) return target_directory_name
def _check_contingency_table(contingency_table_as_matrix): """Checks contingency table for errors. :param contingency_table_as_matrix: K-by-K numpy array. contingency_table_as_matrix[i, j] is the number of examples for which the predicted label is i and the true label is j. """ error_checking.assert_is_numpy_array( contingency_table_as_matrix, num_dimensions=2) num_classes = contingency_table_as_matrix.shape[0] error_checking.assert_is_numpy_array( contingency_table_as_matrix, exact_dimensions=numpy.array([num_classes, num_classes])) error_checking.assert_is_integer_numpy_array(contingency_table_as_matrix) error_checking.assert_is_geq_numpy_array(contingency_table_as_matrix, 0)
def test_get_random_sample_points_full_size(self): """Ensures correct output from _get_random_sample_points. In this case, for_downsized_examples = False. """ (these_row_indices, these_column_indices) = evaluation_utils._get_random_sample_points( num_points=NUM_POINTS_TO_SAMPLE, for_downsized_examples=False) error_checking.assert_is_integer_numpy_array(these_row_indices) error_checking.assert_is_geq_numpy_array(these_row_indices, 0) error_checking.assert_is_less_than_numpy_array(these_row_indices, NUM_ROWS_FOR_FCN_INPUT) error_checking.assert_is_integer_numpy_array(these_column_indices) error_checking.assert_is_geq_numpy_array(these_column_indices, 0) error_checking.assert_is_less_than_numpy_array( these_column_indices, NUM_COLUMNS_FOR_FCN_INPUT)