def check_evaluation_pairs(class_probability_matrix, observed_labels): """Checks evaluation pairs for errors. P = number of evaluation pairs K = number of classes :param class_probability_matrix: P-by-K numpy array of floats. class_probability_matrix[i, k] is the predicted probability that the [i]th example belongs to the [k]th class. :param observed_labels: length-P numpy array of integers. If observed_labels[i] = k, the [i]th example truly belongs to the [k]th class. """ error_checking.assert_is_numpy_array( class_probability_matrix, num_dimensions=2) error_checking.assert_is_geq_numpy_array(class_probability_matrix, 0.) error_checking.assert_is_leq_numpy_array(class_probability_matrix, 1.) num_evaluation_pairs = class_probability_matrix.shape[0] num_classes = class_probability_matrix.shape[1] error_checking.assert_is_numpy_array( observed_labels, exact_dimensions=numpy.array([num_evaluation_pairs])) error_checking.assert_is_integer_numpy_array(observed_labels) error_checking.assert_is_geq_numpy_array(observed_labels, 0) error_checking.assert_is_less_than_numpy_array(observed_labels, num_classes)
def extract_radar_grid_points(field_matrix, row_indices, column_indices): """Extracts grid points from radar field. M = number of rows (unique grid-point latitudes) N = number of columns (unique grid-point longitudes) P = number of points to extract :param field_matrix: M-by-N numpy array with values of a single radar field. :param row_indices: length-P numpy array with row indices of points to extract. :param column_indices: length-P numpy array with column indices of points to extract. :return: extracted_values: length-P numpy array of values extracted from field_matrix. """ error_checking.assert_is_real_numpy_array(field_matrix) error_checking.assert_is_numpy_array(field_matrix, num_dimensions=2) num_grid_rows = field_matrix.shape[0] num_grid_columns = field_matrix.shape[1] error_checking.assert_is_integer_numpy_array(row_indices) error_checking.assert_is_geq_numpy_array(row_indices, 0) error_checking.assert_is_less_than_numpy_array(row_indices, num_grid_rows) error_checking.assert_is_integer_numpy_array(column_indices) error_checking.assert_is_geq_numpy_array(column_indices, 0) error_checking.assert_is_less_than_numpy_array(column_indices, num_grid_columns) return field_matrix[row_indices, column_indices]
def _check_evaluation_pairs(class_probability_matrix, observed_labels): """Checks evaluation pairs for errors. P = number of evaluation pairs K = number of classes :param class_probability_matrix: P-by-K numpy array of floats. class_probability_matrix[i, k] is the predicted probability that the [i]th example belongs to the [k]th class. :param observed_labels: length-P numpy array of integers. If observed_labels[i] = k, the [i]th example truly belongs to the [k]th class. """ # TODO(thunderhoser): This method is duplicated from evaluation_utils.py. I # can't just import evaluation_utils.py, because this leads to a circular # import chain. The answer is to put this method somewhere more general. error_checking.assert_is_numpy_array(class_probability_matrix, num_dimensions=2) error_checking.assert_is_geq_numpy_array(class_probability_matrix, 0.) error_checking.assert_is_leq_numpy_array(class_probability_matrix, 1.) num_evaluation_pairs = class_probability_matrix.shape[0] num_classes = class_probability_matrix.shape[1] error_checking.assert_is_numpy_array(observed_labels, exact_dimensions=numpy.array( [num_evaluation_pairs])) error_checking.assert_is_integer_numpy_array(observed_labels) error_checking.assert_is_geq_numpy_array(observed_labels, 0) error_checking.assert_is_less_than_numpy_array(observed_labels, num_classes)
def test_assert_is_negative_numpy_array_true_with_nan_allowed(self): """Checks assert_is_less_than_numpy_array; base_value = 0, inputs < 0. In this case, input array contains NaN's and allow_nan = True. """ error_checking.assert_is_less_than_numpy_array( NEGATIVE_NUMPY_ARRAY_WITH_NANS, 0, allow_nan=True)
def get_events_in_hours(desired_hours, verbose, event_hours=None, event_times_unix_sec=None): """Finds events in desired hours. If `event_hours is None`, `event_times_unix_sec` will be used. :param desired_hours: 1-D numpy array of desired hours (range 0...23). :param verbose: Boolean flag. If True, will print messages to command window. :param event_hours: 1-D numpy array of event hours (range 0...23). :param event_times_unix_sec: 1-D numpy array of event times. :return: desired_event_indices: 1-D numpy array with indices of events in desired hours. """ if event_hours is None: error_checking.assert_is_numpy_array(event_times_unix_sec, num_dimensions=1) event_hours = numpy.array([ int(time_conversion.unix_sec_to_string(t, '%H')) for t in event_times_unix_sec ], dtype=int) error_checking.assert_is_integer_numpy_array(event_hours) error_checking.assert_is_numpy_array(event_hours, num_dimensions=1) error_checking.assert_is_geq_numpy_array(event_hours, 0) error_checking.assert_is_less_than_numpy_array(event_hours, NUM_HOURS_IN_DAY) error_checking.assert_is_integer_numpy_array(desired_hours) error_checking.assert_is_numpy_array(desired_hours, num_dimensions=1) error_checking.assert_is_geq_numpy_array(desired_hours, 0) error_checking.assert_is_less_than_numpy_array(desired_hours, NUM_HOURS_IN_DAY) error_checking.assert_is_boolean(verbose) desired_event_flags = numpy.array( [m in desired_hours for m in event_hours], dtype=bool) desired_event_indices = numpy.where(desired_event_flags)[0] if not verbose: return desired_event_indices, event_hours print('{0:d} of {1:d} events are in hours {2:s}!'.format( len(desired_event_indices), len(event_hours), str(desired_hours))) return desired_event_indices, event_hours
def check_target_array(target_array, num_dimensions, num_classes): """Error-checks target values. :param target_array: numpy array in one of two formats. [1] length-E integer numpy array of target values. All values are -2 ("dead storm") or 0...[K - 1], where K = number of classes. [2] E-by-K numpy array, where each value is 0 or 1. If target_array[i, k] = 1, the [i]th storm object belongs to the [k]th class. Classes are mutually exclusive and collectively exhaustive, so the sum across each row of the matrix is 1. :param num_dimensions: Number of dimensions expected in `target_array`. :param num_classes: Number of classes that should be represented in `target_array`. """ error_checking.assert_is_integer(num_dimensions) error_checking.assert_is_geq(num_dimensions, 1) error_checking.assert_is_leq(num_dimensions, 2) error_checking.assert_is_integer(num_classes) error_checking.assert_is_geq(num_classes, 2) num_examples = target_array.shape[0] if num_dimensions == 1: error_checking.assert_is_integer_numpy_array(target_array) these_expected_dim = numpy.array([num_examples], dtype=int) error_checking.assert_is_numpy_array( target_array, exact_dimensions=these_expected_dim) # TODO(thunderhoser): This is a HACK. Should do better input-checking. # live_storm_object_indices = numpy.where( # target_array != target_val_utils.DEAD_STORM_INTEGER # )[0] # error_checking.assert_is_geq_numpy_array( # target_array[live_storm_object_indices], 0 # ) error_checking.assert_is_geq_numpy_array( target_array, target_val_utils.DEAD_STORM_INTEGER) error_checking.assert_is_less_than_numpy_array(target_array, num_classes) else: error_checking.assert_is_geq_numpy_array(target_array, 0) error_checking.assert_is_leq_numpy_array(target_array, 1) these_expected_dim = numpy.array([num_examples, num_classes], dtype=int) error_checking.assert_is_numpy_array( target_array, exact_dimensions=these_expected_dim)
def test_get_random_sample_points_full_size(self): """Ensures correct output from _get_random_sample_points. In this case, for_downsized_examples = False. """ (these_row_indices, these_column_indices) = evaluation_utils._get_random_sample_points( num_points=NUM_POINTS_TO_SAMPLE, for_downsized_examples=False) error_checking.assert_is_integer_numpy_array(these_row_indices) error_checking.assert_is_geq_numpy_array(these_row_indices, 0) error_checking.assert_is_less_than_numpy_array(these_row_indices, NUM_ROWS_FOR_FCN_INPUT) error_checking.assert_is_integer_numpy_array(these_column_indices) error_checking.assert_is_geq_numpy_array(these_column_indices, 0) error_checking.assert_is_less_than_numpy_array( these_column_indices, NUM_COLUMNS_FOR_FCN_INPUT)
def _check_polygons(polygon_objects_grid_coords, num_panel_rows, num_panel_columns, panel_row_by_polygon, panel_column_by_polygon): """Error-checks list of polygons. :param polygon_objects_grid_coords: See doc for `polygons_from_pixel_to_grid_coords`. :param num_panel_rows: Same. :param num_panel_columns: Same. :param panel_row_by_polygon: Same. :param panel_column_by_polygon: Same. """ error_checking.assert_is_integer(num_panel_rows) error_checking.assert_is_greater(num_panel_rows, 0) error_checking.assert_is_integer(num_panel_columns) error_checking.assert_is_greater(num_panel_columns, 0) num_polygons = len(polygon_objects_grid_coords) if num_polygons == 0: return error_checking.assert_is_numpy_array(numpy.array( polygon_objects_grid_coords, dtype=object), num_dimensions=1) these_expected_dim = numpy.array([num_polygons], dtype=int) error_checking.assert_is_integer_numpy_array(panel_row_by_polygon) error_checking.assert_is_numpy_array(panel_row_by_polygon, exact_dimensions=these_expected_dim) error_checking.assert_is_geq_numpy_array(panel_row_by_polygon, 0) error_checking.assert_is_less_than_numpy_array(panel_row_by_polygon, num_panel_rows) error_checking.assert_is_integer_numpy_array(panel_column_by_polygon) error_checking.assert_is_numpy_array(panel_column_by_polygon, exact_dimensions=these_expected_dim) error_checking.assert_is_geq_numpy_array(panel_column_by_polygon, 0) error_checking.assert_is_less_than_numpy_array(panel_column_by_polygon, num_panel_columns)
def get_contingency_table(predicted_labels, observed_labels, num_classes): """Creates either binary or multi-class contingency table. P = number of evaluation pairs K = number of classes :param predicted_labels: length-P numpy array of predicted class labels (integers). :param observed_labels: length-P numpy array of true class labels (integers). :param num_classes: Number of classes. :return: contingency_table_as_matrix: K-by-K numpy array. contingency_table_as_matrix[i, j] is the number of examples for which the predicted label is i and the true label is j. """ error_checking.assert_is_integer(num_classes) error_checking.assert_is_greater(num_classes, 2) error_checking.assert_is_numpy_array(predicted_labels, num_dimensions=1) error_checking.assert_is_integer_numpy_array(predicted_labels) error_checking.assert_is_geq_numpy_array(predicted_labels, 0) error_checking.assert_is_less_than_numpy_array( predicted_labels, num_classes) num_evaluation_pairs = len(predicted_labels) error_checking.assert_is_numpy_array( observed_labels, exact_dimensions=numpy.array([num_evaluation_pairs])) error_checking.assert_is_integer_numpy_array(observed_labels) error_checking.assert_is_geq_numpy_array(observed_labels, 0) error_checking.assert_is_less_than_numpy_array(observed_labels, num_classes) contingency_table_as_matrix = numpy.full( (num_classes, num_classes), -1, dtype=int) for i in range(num_classes): for j in range(num_classes): contingency_table_as_matrix[i, j] = numpy.sum( numpy.logical_and(predicted_labels == i, observed_labels == j)) return contingency_table_as_matrix
def test_get_random_sample_points_downsized_no_mask(self): """Ensures correct output from _get_random_sample_points. In this case, `for_downsized_examples = True and narr_mask_matrix is None`. """ (these_row_indices, these_column_indices) = evaluation_utils._get_random_sample_points( num_points=NUM_POINTS_TO_SAMPLE, for_downsized_examples=True, narr_mask_matrix=None) error_checking.assert_is_integer_numpy_array(these_row_indices) error_checking.assert_is_geq_numpy_array(these_row_indices, 0) error_checking.assert_is_less_than_numpy_array( these_row_indices, NARR_MASK_MATRIX.shape[0]) error_checking.assert_is_integer_numpy_array(these_column_indices) error_checking.assert_is_geq_numpy_array(these_column_indices, 0) error_checking.assert_is_less_than_numpy_array( these_column_indices, NARR_MASK_MATRIX.shape[1])
def subset_by_index(example_dict, desired_indices): """Subsets examples by index. :param example_dict: See doc for `example_io.read_file`. :param desired_indices: 1-D numpy array of desired indices. :return: example_dict: Same as input but with fewer examples. """ error_checking.assert_is_numpy_array(desired_indices, num_dimensions=1) error_checking.assert_is_integer_numpy_array(desired_indices) error_checking.assert_is_geq_numpy_array(desired_indices, 0) error_checking.assert_is_less_than_numpy_array( desired_indices, len(example_dict[VALID_TIMES_KEY])) for this_key in ONE_PER_EXAMPLE_KEYS: if isinstance(example_dict[this_key], list): example_dict[this_key] = [ example_dict[this_key][k] for k in desired_indices ] else: example_dict[this_key] = (example_dict[this_key][desired_indices, ...]) return example_dict
def _run(example_file_name, example_indices, num_radar_rows, num_radar_columns, normalization_file_name, output_dir_name): """Plots data augmentation. This is effectively the main method. :param example_file_name: See documentation at top of file. :param example_indices: Same. :param num_radar_rows: Same. :param num_radar_columns: Same. :param normalization_file_name: Same. :param output_dir_name: Same. """ if num_radar_rows <= 0: num_radar_rows = None if num_radar_columns <= 0: num_radar_columns = None file_system_utils.mkdir_recursive_if_necessary( directory_name=output_dir_name) print('Reading data from: "{0:s}"...'.format(example_file_name)) example_dict = input_examples.read_example_file( netcdf_file_name=example_file_name, read_all_target_vars=True, include_soundings=False, num_rows_to_keep=num_radar_rows, num_columns_to_keep=num_radar_columns, radar_field_names_to_keep=[RADAR_FIELD_NAME], radar_heights_to_keep_m_agl=numpy.array([RADAR_HEIGHT_M_AGL], dtype=int)) if input_examples.REFL_IMAGE_MATRIX_KEY in example_dict: radar_matrix = example_dict[input_examples.REFL_IMAGE_MATRIX_KEY] else: radar_matrix = example_dict[input_examples.RADAR_IMAGE_MATRIX_KEY] num_examples_total = radar_matrix.shape[0] error_checking.assert_is_geq_numpy_array(example_indices, 0) error_checking.assert_is_less_than_numpy_array(example_indices, num_examples_total) radar_matrix = radar_matrix[example_indices, ...] full_storm_id_strings = [ example_dict[input_examples.FULL_IDS_KEY][k] for k in example_indices ] storm_times_unix_sec = example_dict[ input_examples.STORM_TIMES_KEY][example_indices] radar_matrix = dl_utils.normalize_radar_images( radar_image_matrix=radar_matrix, field_names=[RADAR_FIELD_NAME], normalization_type_string=NORMALIZATION_TYPE_STRING, normalization_param_file_name=normalization_file_name) num_examples = radar_matrix.shape[0] dummy_target_values = numpy.full(num_examples, 0, dtype=int) radar_matrix = trainval_io._augment_radar_images( list_of_predictor_matrices=[radar_matrix], target_array=dummy_target_values, x_translations_pixels=X_TRANSLATIONS_PX, y_translations_pixels=Y_TRANSLATIONS_PX, ccw_rotation_angles_deg=CCW_ROTATION_ANGLES_DEG, noise_standard_deviation=NOISE_STANDARD_DEVIATION, num_noisings=1, flip_in_x=False, flip_in_y=False)[0][0] radar_matrix = dl_utils.denormalize_radar_images( radar_image_matrix=radar_matrix, field_names=[RADAR_FIELD_NAME], normalization_type_string=NORMALIZATION_TYPE_STRING, normalization_param_file_name=normalization_file_name) orig_radar_matrix = radar_matrix[:num_examples, ...] radar_matrix = radar_matrix[num_examples:, ...] translated_radar_matrix = radar_matrix[:num_examples, ...] radar_matrix = radar_matrix[num_examples:, ...] rotated_radar_matrix = radar_matrix[:num_examples, ...] noised_radar_matrix = radar_matrix[num_examples:, ...] for i in range(num_examples): _plot_one_example(orig_radar_matrix=orig_radar_matrix[i, ...], translated_radar_matrix=translated_radar_matrix[i, ...], rotated_radar_matrix=rotated_radar_matrix[i, ...], noised_radar_matrix=noised_radar_matrix[i, ...], output_dir_name=output_dir_name, full_storm_id_string=full_storm_id_strings[i], storm_time_unix_sec=storm_times_unix_sec[i])
def _run(example_file_name, example_indices, num_radar_rows, num_radar_columns, normalization_file_name, output_dir_name): """Makes figure to explain one convolution block. This is effectively the main method. :param example_file_name: See documentation at top of file. :param example_indices: Same. :param num_radar_rows: Same. :param num_radar_columns: Same. :param normalization_file_name: Same. :param output_dir_name: Same. """ if num_radar_rows <= 0: num_radar_rows = None if num_radar_columns <= 0: num_radar_columns = None file_system_utils.mkdir_recursive_if_necessary( directory_name=output_dir_name) print('Reading data from: "{0:s}"...'.format(example_file_name)) example_dict = input_examples.read_example_file( netcdf_file_name=example_file_name, read_all_target_vars=False, target_name=DUMMY_TARGET_NAME, include_soundings=False, num_rows_to_keep=num_radar_rows, num_columns_to_keep=num_radar_columns, radar_heights_to_keep_m_agl=numpy.array([RADAR_HEIGHT_M_AGL], dtype=int) ) if input_examples.REFL_IMAGE_MATRIX_KEY in example_dict: input_feature_matrix = example_dict[ input_examples.REFL_IMAGE_MATRIX_KEY] else: field_index = example_dict[input_examples.RADAR_FIELDS_KEY].index( RADAR_FIELD_NAME ) input_feature_matrix = example_dict[ input_examples.RADAR_IMAGE_MATRIX_KEY ][..., [field_index]] num_examples = input_feature_matrix.shape[0] error_checking.assert_is_geq_numpy_array(example_indices, 0) error_checking.assert_is_less_than_numpy_array( example_indices, num_examples) input_feature_matrix = dl_utils.normalize_radar_images( radar_image_matrix=input_feature_matrix, field_names=[RADAR_FIELD_NAME], normalization_type_string=NORMALIZATION_TYPE_STRING, normalization_param_file_name=normalization_file_name) if len(input_feature_matrix.shape) == 4: input_feature_matrix = input_feature_matrix[..., 0] else: input_feature_matrix = input_feature_matrix[..., 0, 0] input_feature_matrix = numpy.expand_dims(input_feature_matrix, axis=-1) print('Doing convolution for all {0:d} examples...'.format(num_examples)) feature_matrix_after_conv = None for i in range(num_examples): this_feature_matrix = standalone_utils.do_2d_convolution( feature_matrix=input_feature_matrix[i, ...] + 0, kernel_matrix=KERNEL_MATRIX, pad_edges=False, stride_length_px=1 )[0, ...] if feature_matrix_after_conv is None: feature_matrix_after_conv = numpy.full( (num_examples,) + this_feature_matrix.shape, numpy.nan ) feature_matrix_after_conv[i, ...] = this_feature_matrix print('Doing activation for all {0:d} examples...'.format(num_examples)) feature_matrix_after_activn = standalone_utils.do_activation( input_values=feature_matrix_after_conv + 0, function_name=architecture_utils.RELU_FUNCTION_STRING, alpha=0.2) print('Doing batch norm for all {0:d} examples...'.format(num_examples)) feature_matrix_after_bn = standalone_utils.do_batch_normalization( feature_matrix=feature_matrix_after_activn + 0 ) print('Doing max-pooling for all {0:d} examples...\n'.format(num_examples)) feature_matrix_after_pooling = None for i in range(num_examples): this_feature_matrix = standalone_utils.do_2d_pooling( feature_matrix=feature_matrix_after_bn[i, ...], stride_length_px=2, pooling_type_string=standalone_utils.MAX_POOLING_TYPE_STRING )[0, ...] if feature_matrix_after_pooling is None: feature_matrix_after_pooling = numpy.full( (num_examples,) + this_feature_matrix.shape, numpy.nan ) feature_matrix_after_pooling[i, ...] = this_feature_matrix for i in example_indices: this_output_file_name = '{0:s}/convolution_block{1:06d}.jpg'.format( output_dir_name, i) _plot_one_example( input_feature_matrix=input_feature_matrix[i, ...], feature_matrix_after_conv=feature_matrix_after_conv[i, ...], feature_matrix_after_activn=feature_matrix_after_activn[i, ...], feature_matrix_after_bn=feature_matrix_after_bn[i, ...], feature_matrix_after_pooling=feature_matrix_after_pooling[i, ...], output_file_name=this_output_file_name)
def fit_mvn_for_each_class(feature_table, class_labels, num_classes, assume_diagonal_covar_matrix=False): """For each class, fits data to a multivariate normal distribution. N = number of examples M = number of features (input variables) K = number of classes :param feature_table: pandas DataFrame with N rows and M columns. Column names are feature names. :param class_labels: length-N numpy array of class labels. Should be integers ranging from 0...[num_classes - 1]. :param num_classes: Number of classes. :param assume_diagonal_covar_matrix: See documentation for fit_multivariate_normal. :return: list_of_mvn_dictionaries: length-K list of dictionaries, each with the following keys. list_of_mvn_dictionaries[k]['prior_class_probability']: Prior probability of [k]th class. This is the frequency of value (k - 1) in `class_labels`. list_of_mvn_dictionaries[k]['orig_feature_table']: Original feature table (before transforming marginals to normal distribution) for [k]th class. list_of_mvn_dictionaries[k]['feature_names']: length-M list of feature names (same for each class). list_of_mvn_dictionaries[k]['feature_means']: length-M numpy array with mean value of each feature, given the [k]th class. list_of_mvn_dictionaries[k]['covariance_matrix']: M-by-M numpy array. Covariance matrix, given the [k]th class. list_of_mvn_dictionaries[k]['covar_matrix_inverse']: Inverse of covariance matrix for [k]th class. list_of_mvn_dictionaries[k]['covar_matrix_determinant']: Determinant of covariance matrix for [k]th class. :raises: ValueError: if any class is not represented in `class_labels`. """ num_examples = len(feature_table.index) error_checking.assert_is_integer(num_classes) error_checking.assert_is_geq(num_classes, 2) error_checking.assert_is_integer_numpy_array(class_labels) error_checking.assert_is_numpy_array(class_labels, exact_dimensions=numpy.array( [num_examples])) error_checking.assert_is_geq_numpy_array(class_labels, 0) error_checking.assert_is_less_than_numpy_array(class_labels, num_classes) list_of_mvn_dictionaries = [] for k in range(num_classes): these_flags = class_labels == k if not numpy.any(these_flags): error_string = ('Class {0:d} (label {1:d}) does not exist in the ' 'input data.').format(k + 1, k) raise ValueError(error_string) these_indices = numpy.where(these_flags)[0] this_dict = fit_multivariate_normal( feature_table.iloc[these_indices], assume_diagonal_covar_matrix=assume_diagonal_covar_matrix) this_dict.update({ PRIOR_CLASS_PROBABILITY_KEY: float(len(these_indices)) / num_examples }) this_dict.update( {ORIG_FEATURE_TABLE_KEY: feature_table.iloc[these_indices]}) list_of_mvn_dictionaries.append(this_dict) return list_of_mvn_dictionaries
def test_assert_is_negative_numpy_array_true(self): """assert_is_less_than_numpy_array; base_value = 0, inputs < 0.""" error_checking.assert_is_less_than_numpy_array(NEGATIVE_NUMPY_ARRAY, 0)
def test_assert_is_negative_numpy_array_non_positive(self): """assert_is_less_than_numpy_array; base_value = 0, inputs <= 0.""" with self.assertRaises(ValueError): error_checking.assert_is_less_than_numpy_array( NON_POSITIVE_NUMPY_ARRAY, 0)
def test_assert_is_negative_numpy_array_mixed_sign(self): """assert_is_less_than_numpy_array; base_value = 0, inputs mixed.""" with self.assertRaises(ValueError): error_checking.assert_is_less_than_numpy_array( MIXED_SIGN_NUMPY_ARRAY, 0)