def _check_input_data_for_learning( input_table, feature_names, target_name=None): """Checks input data (to machine-learning model) for errors. :param input_table: pandas DataFrame, where each row is one example (data point). :param feature_names: 1-D list with names of features (predictor variables). Each feature must be a column of input_table. :param target_name: Name of target variable (predictand). Must be a column of input_table. All values must be 0 or 1. """ error_checking.assert_is_string_list(feature_names) error_checking.assert_is_numpy_array( numpy.array(feature_names), num_dimensions=1) if target_name is None: error_checking.assert_columns_in_dataframe(input_table, feature_names) return error_checking.assert_is_string(target_name) error_checking.assert_columns_in_dataframe( input_table, feature_names + [target_name]) target_values = input_table[target_name].values error_checking.assert_is_integer_numpy_array(target_values) error_checking.assert_is_geq_numpy_array(target_values, 0) error_checking.assert_is_leq_numpy_array(target_values, 1)
def displacements_and_bearings_to_xy_components(scalar_displacements_metres, geodetic_bearings_deg): """For each pair of total dsplcmnt and bearing, gets x- and y-displacements. P = number of points :param scalar_displacements_metres: length-P numpy array of total displacements. :param geodetic_bearings_deg: length-P numpy array of geodetic bearings (measured clockwise from due north). :return: x_displacements_metres: length-P numpy array of eastward displacements. :return: y_displacements_metres: length-P numpy array of northward displacements. """ error_checking.assert_is_geq_numpy_array(scalar_displacements_metres, 0.) error_checking.assert_is_numpy_array( scalar_displacements_metres, num_dimensions=1) num_points = len(scalar_displacements_metres) error_checking.assert_is_geq_numpy_array(geodetic_bearings_deg, 0.) error_checking.assert_is_leq_numpy_array(geodetic_bearings_deg, 360.) error_checking.assert_is_numpy_array( geodetic_bearings_deg, exact_dimensions=numpy.array([num_points])) standard_angles_radians = DEGREES_TO_RADIANS * geodetic_to_standard_angles( geodetic_bearings_deg) return (scalar_displacements_metres * numpy.cos(standard_angles_radians), scalar_displacements_metres * numpy.sin(standard_angles_radians))
def test_get_translations(self): """Ensures correct output from get_translations.""" (these_x_offsets_pixels, these_y_offsets_pixels ) = data_augmentation.get_translations( num_translations=NUM_TRANSLATIONS, max_translation_pixels=MAX_TRANSLATION_PIXELS, num_grid_rows=2 * MAX_TRANSLATION_PIXELS, num_grid_columns=2 * MAX_TRANSLATION_PIXELS) self.assertTrue(len(these_x_offsets_pixels) == NUM_TRANSLATIONS) error_checking.assert_is_geq_numpy_array( these_x_offsets_pixels, -MAX_TRANSLATION_PIXELS) error_checking.assert_is_leq_numpy_array( these_x_offsets_pixels, MAX_TRANSLATION_PIXELS) self.assertTrue(len(these_y_offsets_pixels) == NUM_TRANSLATIONS) error_checking.assert_is_geq_numpy_array( these_y_offsets_pixels, -MAX_TRANSLATION_PIXELS) error_checking.assert_is_leq_numpy_array( these_y_offsets_pixels, MAX_TRANSLATION_PIXELS) error_checking.assert_is_greater_numpy_array( numpy.absolute(these_x_offsets_pixels) + numpy.absolute(these_y_offsets_pixels), 0)
def _check_frontal_image(image_matrix, assert_binary=False): """Checks frontal image for errors. M = number of grid rows (unique y-coordinates at grid points) N = number of grid columns (unique x-coordinates at grid points) :param image_matrix: M-by-N numpy array of integers. May be either binary (2-class) or ternary (3-class). If binary, all elements must be in {0, 1} and element [i, j] indicates whether or not a front intersects grid cell [i, j]. If ternary, elements must be in `VALID_INTEGER_IDS` and element [i, j] indicates the type of front (warm, cold, or none) intersecting grid cell [i, j]. :param assert_binary: Boolean flag. If True and image is non-binary, this method will error out. """ error_checking.assert_is_numpy_array(image_matrix, num_dimensions=2) error_checking.assert_is_integer_numpy_array(image_matrix) error_checking.assert_is_geq_numpy_array(image_matrix, numpy.min(VALID_INTEGER_IDS)) if assert_binary: error_checking.assert_is_leq_numpy_array(image_matrix, ANY_FRONT_INTEGER_ID) else: error_checking.assert_is_leq_numpy_array(image_matrix, numpy.max(VALID_INTEGER_IDS))
def scalar_displacements_and_bearings_to_xy(scalar_displacements_metres, geodetic_bearings_deg): """For each displacement vector, converts magnitude and direction to x-y. :param scalar_displacements_metres: numpy array of total displacements. :param geodetic_bearings_deg: equivalent-size numpy array of geodetic bearings (from start point to end point, measured clockwise from due north). :return: x_displacements_metres: equivalent-size numpy array of eastward displacements. :return: y_displacements_metres: equivalent-size numpy array of northward displacements. """ error_checking.assert_is_geq_numpy_array(scalar_displacements_metres, 0.) error_checking.assert_is_geq_numpy_array(geodetic_bearings_deg, 0.) error_checking.assert_is_leq_numpy_array(geodetic_bearings_deg, 360.) error_checking.assert_is_numpy_array( geodetic_bearings_deg, exact_dimensions=numpy.array(scalar_displacements_metres.shape)) standard_angles_radians = DEGREES_TO_RADIANS * geodetic_to_standard_angles( geodetic_bearings_deg) return (scalar_displacements_metres * numpy.cos(standard_angles_radians), scalar_displacements_metres * numpy.sin(standard_angles_radians))
def check_evaluation_pairs(class_probability_matrix, observed_labels): """Checks evaluation pairs for errors. P = number of evaluation pairs K = number of classes :param class_probability_matrix: P-by-K numpy array of floats. class_probability_matrix[i, k] is the predicted probability that the [i]th example belongs to the [k]th class. :param observed_labels: length-P numpy array of integers. If observed_labels[i] = k, the [i]th example truly belongs to the [k]th class. """ error_checking.assert_is_numpy_array( class_probability_matrix, num_dimensions=2) error_checking.assert_is_geq_numpy_array(class_probability_matrix, 0.) error_checking.assert_is_leq_numpy_array(class_probability_matrix, 1.) num_evaluation_pairs = class_probability_matrix.shape[0] num_classes = class_probability_matrix.shape[1] error_checking.assert_is_numpy_array( observed_labels, exact_dimensions=numpy.array([num_evaluation_pairs])) error_checking.assert_is_integer_numpy_array(observed_labels) error_checking.assert_is_geq_numpy_array(observed_labels, 0) error_checking.assert_is_less_than_numpy_array(observed_labels, num_classes)
def determinize_probabilities(class_probability_matrix, binarization_threshold): """Determinizes probabilistic predictions. P = number of evaluation pairs :param class_probability_matrix: See documentation for `check_evaluation_pairs`. :param binarization_threshold: See documentation for `find_best_binarization_threshold`. :return: predicted_labels: length-P numpy array of predicted class labels (integers). """ error_checking.assert_is_numpy_array( class_probability_matrix, num_dimensions=2) error_checking.assert_is_geq_numpy_array(class_probability_matrix, 0.) error_checking.assert_is_leq_numpy_array(class_probability_matrix, 1.) error_checking.assert_is_geq(binarization_threshold, 0.) error_checking.assert_is_leq(binarization_threshold, 1.01) num_evaluation_pairs = class_probability_matrix.shape[0] predicted_labels = numpy.full(num_evaluation_pairs, -1, dtype=int) for i in range(num_evaluation_pairs): if class_probability_matrix[i, 0] >= binarization_threshold: predicted_labels[i] = 0 continue predicted_labels[i] = 1 + numpy.argmax(class_probability_matrix[i, 1:]) return predicted_labels
def _check_evaluation_pairs(class_probability_matrix, observed_labels): """Checks evaluation pairs for errors. P = number of evaluation pairs K = number of classes :param class_probability_matrix: P-by-K numpy array of floats. class_probability_matrix[i, k] is the predicted probability that the [i]th example belongs to the [k]th class. :param observed_labels: length-P numpy array of integers. If observed_labels[i] = k, the [i]th example truly belongs to the [k]th class. """ # TODO(thunderhoser): This method is duplicated from evaluation_utils.py. I # can't just import evaluation_utils.py, because this leads to a circular # import chain. The answer is to put this method somewhere more general. error_checking.assert_is_numpy_array(class_probability_matrix, num_dimensions=2) error_checking.assert_is_geq_numpy_array(class_probability_matrix, 0.) error_checking.assert_is_leq_numpy_array(class_probability_matrix, 1.) num_evaluation_pairs = class_probability_matrix.shape[0] num_classes = class_probability_matrix.shape[1] error_checking.assert_is_numpy_array(observed_labels, exact_dimensions=numpy.array( [num_evaluation_pairs])) error_checking.assert_is_integer_numpy_array(observed_labels) error_checking.assert_is_geq_numpy_array(observed_labels, 0) error_checking.assert_is_less_than_numpy_array(observed_labels, num_classes)
def _check_statistic_params(statistic_names, percentile_levels): """Ensures that parameters of statistic are valid. :param statistic_names: 1-D list with names of non-percentile-based statistics. :param percentile_levels: 1-D numpy array of percentile levels. :return: percentile_levels: Same as input, but rounded to the nearest 0.1%. :raises: ValueError: if any element of `statistic_names` is not in `STATISTIC_NAMES`. """ error_checking.assert_is_string_list(statistic_names) error_checking.assert_is_numpy_array(numpy.array(statistic_names), num_dimensions=1) error_checking.assert_is_numpy_array(percentile_levels, num_dimensions=1) error_checking.assert_is_geq_numpy_array(percentile_levels, 0.) error_checking.assert_is_leq_numpy_array(percentile_levels, 100.) for this_name in statistic_names: if this_name in STATISTIC_NAMES: continue error_string = ('\n\n' + str(STATISTIC_NAMES) + '\n\nValid statistic names ' + '(listed above) do not include the following: "' + this_name + '"') raise ValueError(error_string) return numpy.unique( rounder.round_to_nearest(percentile_levels, PERCENTILE_LEVEL_PRECISION))
def test_assert_is_non_positive_numpy_array_negative_with_nan_allowed(self): """Checks assert_is_leq_numpy_array; base_value = 0, inputs < 0. In this case, input array contains NaN's and allow_nan = True. """ error_checking.assert_is_leq_numpy_array( NEGATIVE_NUMPY_ARRAY_WITH_NANS, 0, allow_nan=True)
def start_points_and_displacements_to_endpoints(start_latitudes_deg, start_longitudes_deg, scalar_displacements_metres, geodetic_bearings_deg): """Computes endpoint from each start point and displacement. :param start_latitudes_deg: numpy array with latitudes (deg N) of start points. :param start_longitudes_deg: equivalent-size numpy array with longitudes (deg E) of start points. :param scalar_displacements_metres: equivalent-size numpy array of scalar displacements. :param geodetic_bearings_deg: equivalent-size numpy array of geodetic bearings (from start point to end point, measured clockwise from due north). :return: end_latitudes_deg: equivalent-size numpy array with latitudes (deg N) of endpoints. :return: end_longitudes_deg: equivalent-size numpy array with longitudes (deg E) of endpoints. """ error_checking.assert_is_valid_lat_numpy_array(start_latitudes_deg, allow_nan=False) start_longitudes_deg = lng_conversion.convert_lng_positive_in_west( start_longitudes_deg, allow_nan=False) error_checking.assert_is_numpy_array(start_longitudes_deg, exact_dimensions=numpy.array( start_latitudes_deg.shape)) error_checking.assert_is_geq_numpy_array(scalar_displacements_metres, 0.) error_checking.assert_is_numpy_array(scalar_displacements_metres, exact_dimensions=numpy.array( start_latitudes_deg.shape)) error_checking.assert_is_geq_numpy_array(geodetic_bearings_deg, 0.) error_checking.assert_is_leq_numpy_array(geodetic_bearings_deg, 360.) error_checking.assert_is_numpy_array(geodetic_bearings_deg, exact_dimensions=numpy.array( start_latitudes_deg.shape)) end_latitudes_deg = numpy.full(start_latitudes_deg.shape, numpy.nan) end_longitudes_deg = numpy.full(start_latitudes_deg.shape, numpy.nan) num_points = start_latitudes_deg.size for i in range(num_points): this_start_point_object = geopy.Point(start_latitudes_deg.flat[i], start_longitudes_deg.flat[i]) this_end_point_object = VincentyDistance( meters=scalar_displacements_metres.flat[i]).destination( this_start_point_object, geodetic_bearings_deg.flat[i]) end_latitudes_deg.flat[i] = this_end_point_object.latitude end_longitudes_deg.flat[i] = this_end_point_object.longitude end_longitudes_deg = lng_conversion.convert_lng_positive_in_west( end_longitudes_deg, allow_nan=False) return end_latitudes_deg, end_longitudes_deg
def test_assert_is_non_positive_numpy_array_negative_with_nan_banned(self): """Checks assert_is_leq_numpy_array; base_value = 0, inputs < 0. In this case, input array contains NaN's and allow_nan = False. """ with self.assertRaises(ValueError): error_checking.assert_is_leq_numpy_array( NEGATIVE_NUMPY_ARRAY_WITH_NANS, 0, allow_nan=False)
def write_ensembled_predictions(pickle_file_name, class_probability_matrix, valid_times_unix_sec, narr_mask_matrix, prediction_dir_name_by_model, model_weights): """Writes ensembled predictions to Pickle file. An "ensembled prediction" is an ensemble of gridded predictions from two or more NFA models. T = number of time steps M = number of rows in grid N = number of columns in grid C = number of classes :param pickle_file_name: Path to output file. :param class_probability_matrix: T-by-M-by-N-by-C numpy array of class probabilities. :param valid_times_unix_sec: length-T numpy array of time steps. :param narr_mask_matrix: See doc for `write_gridded_predictions`. :param prediction_dir_name_by_model: See doc for `check_ensemble_metadata`. :param model_weights: Same. """ error_checking.assert_is_geq_numpy_array(class_probability_matrix, 0.) error_checking.assert_is_leq_numpy_array(class_probability_matrix, 1.) error_checking.assert_is_numpy_array(class_probability_matrix, num_dimensions=4) ml_utils.check_narr_mask(narr_mask_matrix) these_expected_dim = numpy.array(class_probability_matrix.shape[1:3], dtype=int) error_checking.assert_is_numpy_array(narr_mask_matrix, exact_dimensions=these_expected_dim) error_checking.assert_is_integer_numpy_array(valid_times_unix_sec) num_times = class_probability_matrix.shape[0] these_expected_dim = numpy.array([num_times], dtype=int) error_checking.assert_is_numpy_array(valid_times_unix_sec, exact_dimensions=these_expected_dim) check_ensemble_metadata( prediction_dir_name_by_model=prediction_dir_name_by_model, model_weights=model_weights) ensemble_dict = { CLASS_PROBABILITIES_KEY: class_probability_matrix, VALID_TIMES_KEY: valid_times_unix_sec, NARR_MASK_KEY: narr_mask_matrix, MODEL_DIRECTORIES_KEY: prediction_dir_name_by_model, MODEL_WEIGHTS_KEY: model_weights } file_system_utils.mkdir_recursive_if_necessary(file_name=pickle_file_name) pickle_file_handle = open(pickle_file_name, 'wb') pickle.dump(ensemble_dict, pickle_file_handle) pickle_file_handle.close()
def plot_roc_curve( axes_object=None, pod_by_threshold=None, pofd_by_threshold=None, line_colour=DEFAULT_ROC_LINE_COLOUR, line_width=DEFAULT_ROC_LINE_WIDTH, random_line_colour=DEFAULT_ROC_RANDOM_LINE_COLOUR, random_line_width=DEFAULT_ROC_RANDOM_LINE_WIDTH): """Plots ROC (receiver operating characteristic) curve. T = number of binarization thresholds For the definition of a "binarization threshold" and the role they play in ROC curves, see `model_evaluation.get_points_in_roc_curve`. :param axes_object: Instance of `matplotlib.axes._subplots.AxesSubplot`. :param pod_by_threshold: length-T numpy array of POD (probability of detection) values. :param pofd_by_threshold: length-T numpy array of POFD (probability of false detection) values. :param line_colour: Colour (in any format accepted by `matplotlib.colors`). :param line_width: Line width (real positive number). :param random_line_colour: Colour of reference line (ROC curve for a random predictor). :param random_line_width: Width of reference line. """ error_checking.assert_is_numpy_array(pod_by_threshold, num_dimensions=1) error_checking.assert_is_geq_numpy_array( pod_by_threshold, 0., allow_nan=True) error_checking.assert_is_leq_numpy_array( pod_by_threshold, 1., allow_nan=True) num_thresholds = len(pod_by_threshold) error_checking.assert_is_numpy_array( pofd_by_threshold, exact_dimensions=numpy.array([num_thresholds])) error_checking.assert_is_geq_numpy_array( pofd_by_threshold, 0., allow_nan=True) error_checking.assert_is_leq_numpy_array( pofd_by_threshold, 1., allow_nan=True) random_x_coords, random_y_coords = model_eval.get_random_roc_curve() axes_object.plot( random_x_coords, random_y_coords, color=random_line_colour, linestyle='dashed', linewidth=random_line_width) nan_flags = numpy.logical_or( numpy.isnan(pofd_by_threshold), numpy.isnan(pod_by_threshold)) if not numpy.all(nan_flags): real_indices = numpy.where(numpy.invert(nan_flags))[0] axes_object.plot( pofd_by_threshold[real_indices], pod_by_threshold[real_indices], color=line_colour, linestyle='solid', linewidth=line_width) axes_object.set_xlabel('POFD (probability of false detection)') axes_object.set_ylabel('POD (probability of detection)') axes_object.set_xlim(0., 1.) axes_object.set_ylim(0., 1.)
def _get_random_sample_points( num_points, for_downsized_examples, narr_mask_matrix=None): """Samples random points from NARR grid. M = number of rows in NARR grid N = number of columns in NARR grid P = number of points sampled :param num_points: Number of points to sample. :param for_downsized_examples: Boolean flag. If True, this method will sample center points for downsized images. If False, will sample evaluation points from a full-size image. :param narr_mask_matrix: M-by-N numpy array of integers (0 or 1). If narr_mask_matrix[i, j] = 0, cell [i, j] in the full grid will never be sampled. If `narr_mask_matrix is None`, any grid cell can be sampled. :return: row_indices: length-P numpy array with row indices of sampled points. :return: column_indices: length-P numpy array with column indices of sampled points. """ if for_downsized_examples: num_grid_rows, num_grid_columns = nwp_model_utils.get_grid_dimensions( model_name=nwp_model_utils.NARR_MODEL_NAME) else: num_grid_rows = ( ml_utils.LAST_NARR_ROW_FOR_FCN_INPUT - ml_utils.FIRST_NARR_ROW_FOR_FCN_INPUT + 1 ) num_grid_columns = ( ml_utils.LAST_NARR_COLUMN_FOR_FCN_INPUT - ml_utils.FIRST_NARR_COLUMN_FOR_FCN_INPUT + 1 ) narr_mask_matrix = None if narr_mask_matrix is None: num_grid_cells = num_grid_rows * num_grid_columns possible_linear_indices = numpy.linspace( 0, num_grid_cells - 1, num=num_grid_cells, dtype=int) else: error_checking.assert_is_integer_numpy_array(narr_mask_matrix) error_checking.assert_is_geq_numpy_array(narr_mask_matrix, 0) error_checking.assert_is_leq_numpy_array(narr_mask_matrix, 1) error_checking.assert_is_numpy_array( narr_mask_matrix, exact_dimensions=numpy.array([num_grid_rows, num_grid_columns])) possible_linear_indices = numpy.where( numpy.ravel(narr_mask_matrix) == 1)[0] linear_indices = numpy.random.choice( possible_linear_indices, size=num_points, replace=False) return numpy.unravel_index( linear_indices, (num_grid_rows, num_grid_columns))
def get_events_in_months(desired_months, verbose, event_months=None, event_times_unix_sec=None): """Finds events in desired months. If `event_months is None`, `event_times_unix_sec` will be used. :param desired_months: 1-D numpy array of desired months (range 1...12). :param verbose: Boolean flag. If True, will print messages to command window. :param event_months: 1-D numpy array of event months (range 1...12). :param event_times_unix_sec: 1-D numpy array of event times. :return: desired_event_indices: 1-D numpy array with indices of events in desired months. :return: event_months: See input doc. """ if event_months is None: error_checking.assert_is_numpy_array(event_times_unix_sec, num_dimensions=1) event_months = numpy.array([ int(time_conversion.unix_sec_to_string(t, '%m')) for t in event_times_unix_sec ], dtype=int) error_checking.assert_is_integer_numpy_array(event_months) error_checking.assert_is_numpy_array(event_months, num_dimensions=1) error_checking.assert_is_geq_numpy_array(event_months, 1) error_checking.assert_is_leq_numpy_array(event_months, NUM_MONTHS_IN_YEAR) error_checking.assert_is_integer_numpy_array(desired_months) error_checking.assert_is_numpy_array(desired_months, num_dimensions=1) error_checking.assert_is_geq_numpy_array(desired_months, 1) error_checking.assert_is_leq_numpy_array(desired_months, NUM_MONTHS_IN_YEAR) error_checking.assert_is_boolean(verbose) desired_event_flags = numpy.array( [m in desired_months for m in event_months], dtype=bool) desired_event_indices = numpy.where(desired_event_flags)[0] if not verbose: return desired_event_indices, event_months print('{0:d} of {1:d} events are in months {2:s}!'.format( len(desired_event_indices), len(event_months), str(desired_months))) return desired_event_indices, event_months
def get_random_colours(num_colours, colour_to_exclude_rgb=None, min_rgb_distance=DEFAULT_MIN_RGB_DISTANCE): """Returns list of random colours. N = number of colours :param num_colours: Number of colours desired. :param colour_to_exclude_rgb: Colour to exclude (length-3 numpy array with values in 0...1). :param min_rgb_distance: All colours returned will be at least this far away from `colour_to_exclude_rgb`. Distance is Euclidean. :return: rgb_matrix: N-by-3 numpy array with values in 0...1. Each row is one colour. """ orig_num_colours = num_colours + 0 if colour_to_exclude_rgb is not None: error_checking.assert_is_numpy_array( colour_to_exclude_rgb, exact_dimensions=numpy.array([3], dtype=int) ) error_checking.assert_is_geq_numpy_array(colour_to_exclude_rgb, 0.) error_checking.assert_is_leq_numpy_array(colour_to_exclude_rgb, 1.) error_checking.assert_is_greater(min_rgb_distance, 0.) error_checking.assert_is_leq(min_rgb_distance, 1.) num_colours = 10 * num_colours rgb_matrix = numpy.random.uniform(low=0., high=1., size=(num_colours, 3)) if colour_to_exclude_rgb is not None: colour_to_exclude_rgb = numpy.reshape(colour_to_exclude_rgb, (1, 3)) squared_distances = euclidean_distances( X=rgb_matrix, Y=numpy.reshape(colour_to_exclude_rgb, (1, 3)), squared=True ) good_indices = numpy.where( squared_distances >= min_rgb_distance ** 2 )[0] rgb_matrix = rgb_matrix[good_indices, ...] num_colours = min([ orig_num_colours, rgb_matrix.shape[0] ]) rgb_matrix = rgb_matrix[:num_colours, ...] numpy.random.shuffle(rgb_matrix) return rgb_matrix
def check_target_array(target_array, num_dimensions, num_classes): """Error-checks target values. :param target_array: numpy array in one of two formats. [1] length-E integer numpy array of target values. All values are -2 ("dead storm") or 0...[K - 1], where K = number of classes. [2] E-by-K numpy array, where each value is 0 or 1. If target_array[i, k] = 1, the [i]th storm object belongs to the [k]th class. Classes are mutually exclusive and collectively exhaustive, so the sum across each row of the matrix is 1. :param num_dimensions: Number of dimensions expected in `target_array`. :param num_classes: Number of classes that should be represented in `target_array`. """ error_checking.assert_is_integer(num_dimensions) error_checking.assert_is_geq(num_dimensions, 1) error_checking.assert_is_leq(num_dimensions, 2) error_checking.assert_is_integer(num_classes) error_checking.assert_is_geq(num_classes, 2) num_examples = target_array.shape[0] if num_dimensions == 1: error_checking.assert_is_integer_numpy_array(target_array) these_expected_dim = numpy.array([num_examples], dtype=int) error_checking.assert_is_numpy_array( target_array, exact_dimensions=these_expected_dim) # TODO(thunderhoser): This is a HACK. Should do better input-checking. # live_storm_object_indices = numpy.where( # target_array != target_val_utils.DEAD_STORM_INTEGER # )[0] # error_checking.assert_is_geq_numpy_array( # target_array[live_storm_object_indices], 0 # ) error_checking.assert_is_geq_numpy_array( target_array, target_val_utils.DEAD_STORM_INTEGER) error_checking.assert_is_less_than_numpy_array(target_array, num_classes) else: error_checking.assert_is_geq_numpy_array(target_array, 0) error_checking.assert_is_leq_numpy_array(target_array, 1) these_expected_dim = numpy.array([num_examples, num_classes], dtype=int) error_checking.assert_is_numpy_array( target_array, exact_dimensions=these_expected_dim)
def check_time_separation(unix_times_sec, early_indices=None, late_indices=None, time_separation_sec=DEFAULT_TIME_SEPARATION_SEC): """Ensures that there is a separation (buffer) between two sets of times. :param unix_times_sec: See documentation for _apply_time_separation. :param early_indices: See documentation for _apply_time_separation. :param late_indices: See documentation for _apply_time_separation. :param time_separation_sec: See documentation for _apply_time_separation. :raises: ValueError: if separation between sets is < `time_separation_sec`. """ error_checking.assert_is_integer_numpy_array(unix_times_sec) error_checking.assert_is_numpy_array_without_nan(unix_times_sec) error_checking.assert_is_numpy_array(unix_times_sec, num_dimensions=1) num_times = len(unix_times_sec) error_checking.assert_is_integer_numpy_array(early_indices) error_checking.assert_is_numpy_array(early_indices, num_dimensions=1) error_checking.assert_is_geq_numpy_array(early_indices, 0) error_checking.assert_is_leq_numpy_array(early_indices, num_times - 1) error_checking.assert_is_integer_numpy_array(late_indices) error_checking.assert_is_numpy_array(late_indices, num_dimensions=1) error_checking.assert_is_geq_numpy_array(late_indices, 0) error_checking.assert_is_leq_numpy_array(late_indices, num_times - 1) error_checking.assert_is_greater_numpy_array( unix_times_sec[late_indices], numpy.max(unix_times_sec[early_indices])) error_checking.assert_is_integer(time_separation_sec) error_checking.assert_is_greater(time_separation_sec, 0) last_early_time_unix_sec = numpy.max(unix_times_sec[early_indices]) first_late_time_unix_sec = numpy.min(unix_times_sec[late_indices]) min_diff_between_sets_sec = (first_late_time_unix_sec - last_early_time_unix_sec) if min_diff_between_sets_sec < time_separation_sec: last_early_time_string = time_conversion.unix_sec_to_string( last_early_time_unix_sec, TIME_STRING_FORMAT) first_late_time_string = time_conversion.unix_sec_to_string( first_late_time_unix_sec, TIME_STRING_FORMAT) error_string = ('Last time in early set is ' + last_early_time_string + '. First time in late set is ' + first_late_time_string + '. This is a time separation of ' + str(min_diff_between_sets_sec) + ' seconds between sets. Required separation is >= ' + str(time_separation_sec) + ' s.') raise ValueError(error_string)
def _polygons_to_mask_one_panel(polygon_objects_grid_coords, num_grid_rows, num_grid_columns): """Converts list of polygons to binary mask. M = number of rows in grid N = number of columns in grid :param polygon_objects_grid_coords: See doc for `polygons_from_pixel_to_grid_coords`. :param num_grid_rows: Same. :param num_grid_columns: Same. :return: mask_matrix: M-by-N numpy array of Boolean flags. If mask_matrix[i, j] == True, grid point [i, j] is in/on at least one of the polygons. """ mask_matrix = numpy.full((num_grid_rows, num_grid_columns), False, dtype=bool) num_polygons = len(polygon_objects_grid_coords) if num_polygons == 0: return mask_matrix # TODO(thunderhoser): This triple for-loop is probably inefficient. for k in range(num_polygons): these_grid_columns = numpy.array( polygon_objects_grid_coords[k].exterior.xy[0]) error_checking.assert_is_geq_numpy_array(these_grid_columns, -0.5) error_checking.assert_is_leq_numpy_array(these_grid_columns, num_grid_columns - 0.5) these_grid_rows = numpy.array( polygon_objects_grid_coords[k].exterior.xy[1]) error_checking.assert_is_geq_numpy_array(these_grid_rows, -0.5) error_checking.assert_is_leq_numpy_array(these_grid_rows, num_grid_rows - 0.5) for i in range(num_grid_rows): for j in range(num_grid_columns): if mask_matrix[i, j]: continue mask_matrix[i, j] = polygons.point_in_or_on_polygon( polygon_object=polygon_objects_grid_coords[k], query_x_coordinate=j, query_y_coordinate=i) return mask_matrix
def plot_narr_grid( probability_matrix, front_string_id, axes_object, basemap_object, first_row_in_narr_grid=0, first_column_in_narr_grid=0, opacity=DEFAULT_GRID_OPACITY): """Plots frontal-probability map on NARR grid. This method plots data over a contiguous subset of the NARR grid, which need not be *strictly* a subset. In other words, the "subset" could be the full NARR grid. M = number of rows (unique grid-point y-coordinates) N = number of columns (unique grid-point x-coordinates) :param probability_matrix: M-by-N numpy array, where predicted_target_matrix[i, j] is the predicted probability of a front passing through grid cell [i, j]. :param front_string_id: Type of fronts predicted in `probability_matrix`. May be "warm", "cold", or "any". :param axes_object: Instance of `matplotlib.axes._subplots.AxesSubplot`. :param basemap_object: Instance of `mpl_toolkits.basemap.Basemap`. :param first_row_in_narr_grid: Row 0 in the subgrid is row `first_row_in_narr_grid` in the full NARR grid. :param first_column_in_narr_grid: Column 0 in the subgrid is row `first_column_in_narr_grid` in the full NARR grid. :param opacity: Opacity for colour map (in range 0...1). """ error_checking.assert_is_numpy_array(probability_matrix, num_dimensions=2) error_checking.assert_is_geq_numpy_array( probability_matrix, 0., allow_nan=False) error_checking.assert_is_leq_numpy_array( probability_matrix, 1., allow_nan=False) _check_front_type(front_string_id) if front_string_id == ANY_FRONT_STRING_ID: colour_map_object, _, colour_bounds = get_any_front_colour_map() elif front_string_id == front_utils.WARM_FRONT_STRING_ID: colour_map_object, _, colour_bounds = get_warm_front_colour_map() else: colour_map_object, _, colour_bounds = get_cold_front_colour_map() colour_minimum = colour_bounds[1] colour_maximum = colour_bounds[-2] narr_plotting.plot_xy_grid( data_matrix=probability_matrix, axes_object=axes_object, basemap_object=basemap_object, colour_map=colour_map_object, colour_minimum=colour_minimum, colour_maximum=colour_maximum, first_row_in_narr_grid=first_row_in_narr_grid, first_column_in_narr_grid=first_column_in_narr_grid, opacity=opacity)
def start_points_and_distances_and_bearings_to_endpoints( start_latitudes_deg=None, start_longitudes_deg=None, displacements_metres=None, geodetic_bearings_deg=None): """Computes endpoint from each start point, displacement, and bearing. P = number of start points :param start_latitudes_deg: length-P numpy array of beginning latitudes (deg N). :param start_longitudes_deg: length-P numpy array of beginning longitudes (deg E). :param displacements_metres: length-P numpy array of displacements. :param geodetic_bearings_deg: length-P numpy array of geodetic bearings (from start point towards end point, measured clockwise from due north). :return: end_latitudes_deg: length-P numpy array of end latitudes (deg N). :return: end_longitudes_deg: length-P numpy array of end longitudes (deg E). """ error_checking.assert_is_valid_lat_numpy_array( start_latitudes_deg, allow_nan=False) error_checking.assert_is_numpy_array(start_latitudes_deg, num_dimensions=1) num_points = len(start_latitudes_deg) start_longitudes_deg = lng_conversion.convert_lng_positive_in_west( start_longitudes_deg, allow_nan=False) error_checking.assert_is_numpy_array( start_longitudes_deg, exact_dimensions=numpy.array([num_points])) error_checking.assert_is_geq_numpy_array(displacements_metres, 0.) error_checking.assert_is_numpy_array( displacements_metres, exact_dimensions=numpy.array([num_points])) error_checking.assert_is_geq_numpy_array(geodetic_bearings_deg, 0.) error_checking.assert_is_leq_numpy_array(geodetic_bearings_deg, 360.) error_checking.assert_is_numpy_array( geodetic_bearings_deg, exact_dimensions=numpy.array([num_points])) end_latitudes_deg = numpy.full(num_points, numpy.nan) end_longitudes_deg = numpy.full(num_points, numpy.nan) for i in range(num_points): this_start_point_object = geopy.Point( start_latitudes_deg[i], start_longitudes_deg[i]) this_end_point_object = VincentyDistance( meters=displacements_metres[i]).destination( this_start_point_object, geodetic_bearings_deg[i]) end_latitudes_deg[i] = this_end_point_object.latitude end_longitudes_deg[i] = this_end_point_object.longitude return end_latitudes_deg, lng_conversion.convert_lng_positive_in_west( end_longitudes_deg, allow_nan=False)
def test_get_rotations(self): """Ensures correct output from get_rotations.""" these_ccw_rotation_angles_deg = data_augmentation.get_rotations( num_rotations=NUM_ROTATIONS, max_absolute_rotation_angle_deg=MAX_ABSOLUTE_ROTATION_ANGLE_DEG) self.assertTrue(len(these_ccw_rotation_angles_deg) == NUM_ROTATIONS) error_checking.assert_is_geq_numpy_array( numpy.absolute(these_ccw_rotation_angles_deg), data_augmentation.MIN_ABSOLUTE_ROTATION_ANGLE_DEG) error_checking.assert_is_leq_numpy_array( numpy.absolute(these_ccw_rotation_angles_deg), data_augmentation.MAX_ABSOLUTE_ROTATION_ANGLE_DEG)
def test_get_noisings(self): """Ensures correct output get_noisings.""" these_standard_deviations = data_augmentation.get_noisings( num_noisings=NUM_NOISINGS, max_standard_deviation=MAX_NOISE_STANDARD_DEVIATION) self.assertTrue(len(these_standard_deviations) == NUM_NOISINGS) error_checking.assert_is_geq_numpy_array( these_standard_deviations, data_augmentation.MIN_NOISE_STANDARD_DEVIATION) error_checking.assert_is_leq_numpy_array( these_standard_deviations, data_augmentation.MAX_NOISE_STANDARD_DEVIATION)
def rotate_winds(u_winds_grid_relative_m_s01=None, v_winds_grid_relative_m_s01=None, rotation_angle_cosines=None, rotation_angle_sines=None): """Rotates wind vectors from grid-relative to Earth-relative. The equation is as follows, where alpha is the rotation angle. u_Earth = u_grid * cos(alpha) + v_grid * sin(alpha) v_Earth = v_grid * cos(alpha) - u_grid * sin(alpha) :param u_winds_grid_relative_m_s01: numpy array of grid-relative u-winds (towards positive x-direction). :param v_winds_grid_relative_m_s01: equivalent-shape numpy array of grid- relative v-winds (towards positive y-direction). :param rotation_angle_cosines: equivalent-shape numpy array with cosines of rotation angles. :param rotation_angle_sines: equivalent-shape numpy array with sines of rotation angles. :return: u_winds_earth_relative_m_s01: equivalent-shape numpy array of Earth-relative (northward) u-winds. :return: v_winds_earth_relative_m_s01: equivalent-shape numpy array of Earth-relative (eastward) v-winds. """ error_checking.assert_is_real_numpy_array(u_winds_grid_relative_m_s01) array_dimensions = numpy.asarray(u_winds_grid_relative_m_s01.shape) error_checking.assert_is_real_numpy_array(v_winds_grid_relative_m_s01) error_checking.assert_is_numpy_array(v_winds_grid_relative_m_s01, exact_dimensions=array_dimensions) error_checking.assert_is_geq_numpy_array(rotation_angle_cosines, -1) error_checking.assert_is_leq_numpy_array(rotation_angle_cosines, 1) error_checking.assert_is_numpy_array(rotation_angle_cosines, exact_dimensions=array_dimensions) error_checking.assert_is_geq_numpy_array(rotation_angle_sines, -1) error_checking.assert_is_leq_numpy_array(rotation_angle_sines, 1) error_checking.assert_is_numpy_array(rotation_angle_sines, exact_dimensions=array_dimensions) u_winds_earth_relative_m_s01 = ( rotation_angle_cosines * u_winds_grid_relative_m_s01 + rotation_angle_sines * v_winds_grid_relative_m_s01) v_winds_earth_relative_m_s01 = ( rotation_angle_cosines * v_winds_grid_relative_m_s01 - rotation_angle_sines * u_winds_grid_relative_m_s01) return u_winds_earth_relative_m_s01, v_winds_earth_relative_m_s01
def plot_narr_grid(frontal_grid_matrix, axes_object, basemap_object, first_row_in_narr_grid=0, first_column_in_narr_grid=0, opacity=DEFAULT_GRID_OPACITY): """Plots NARR grid points intersected by a warm front or cold front. This method plots data over a contiguous subset of the NARR grid, which need not be *strictly* a subset. In other words, the "subset" could be the full NARR grid. :param frontal_grid_matrix: See documentation for `front_utils.frontal_grid_to_points`. :param axes_object: Instance of `matplotlib.axes._subplots.AxesSubplot`. :param basemap_object: Instance of `mpl_toolkits.basemap.Basemap`. :param first_row_in_narr_grid: Row 0 in the subgrid is row `first_row_in_narr_grid` in the full NARR grid. :param first_column_in_narr_grid: Column 0 in the subgrid is row `first_column_in_narr_grid` in the full NARR grid. :param opacity: Opacity for colour map (in range 0...1). """ error_checking.assert_is_integer_numpy_array(frontal_grid_matrix) error_checking.assert_is_numpy_array(frontal_grid_matrix, num_dimensions=2) error_checking.assert_is_geq_numpy_array( frontal_grid_matrix, numpy.min(front_utils.VALID_INTEGER_IDS)) error_checking.assert_is_leq_numpy_array( frontal_grid_matrix, numpy.max(front_utils.VALID_INTEGER_IDS)) colour_map_object, _, colour_bounds = get_colour_map_for_grid() frontal_grid_matrix = numpy.ma.masked_where( frontal_grid_matrix == front_utils.NO_FRONT_INTEGER_ID, frontal_grid_matrix) narr_plotting.plot_xy_grid( data_matrix=frontal_grid_matrix, axes_object=axes_object, basemap_object=basemap_object, colour_map=colour_map_object, colour_minimum=colour_bounds[1], colour_maximum=colour_bounds[-2], first_row_in_narr_grid=first_row_in_narr_grid, first_column_in_narr_grid=first_column_in_narr_grid, opacity=opacity)
def plot_attributes_diagram(figure_object, axes_object, mean_forecast_by_bin, event_frequency_by_bin, num_examples_by_bin): """Plots attributes diagram (Hsu and Murphy 1986). :param figure_object: Instance of `matplotlib.figure.Figure`. :param axes_object: Instance of `matplotlib.axes._subplots.AxesSubplot`. :param mean_forecast_by_bin: See doc for `plot_reliability_curve`. :param event_frequency_by_bin: Same. :param num_examples_by_bin: See doc for `_plot_inset_histogram_for_attributes_diagram`. """ error_checking.assert_is_numpy_array(event_frequency_by_bin, num_dimensions=1) error_checking.assert_is_geq_numpy_array(event_frequency_by_bin, 0., allow_nan=True) error_checking.assert_is_leq_numpy_array(event_frequency_by_bin, 1., allow_nan=True) num_bins = len(mean_forecast_by_bin) expected_dim = numpy.array([num_bins], dtype=int) error_checking.assert_is_integer_numpy_array(num_examples_by_bin) error_checking.assert_is_numpy_array(num_examples_by_bin, exact_dimensions=expected_dim) error_checking.assert_is_geq_numpy_array(num_examples_by_bin, 0) non_empty_bin_indices = numpy.where(num_examples_by_bin > 0)[0] error_checking.assert_is_numpy_array_without_nan( event_frequency_by_bin[non_empty_bin_indices]) climatology = numpy.average( event_frequency_by_bin[non_empty_bin_indices], weights=num_examples_by_bin[non_empty_bin_indices]) _plot_background_of_attributes_diagram(axes_object=axes_object, climatology=climatology) _plot_inset_histogram_for_attributes_diagram( figure_object=figure_object, num_examples_by_bin=num_examples_by_bin) plot_reliability_curve(axes_object=axes_object, mean_forecast_by_bin=mean_forecast_by_bin, event_frequency_by_bin=event_frequency_by_bin)
def event_probs_to_multiclass(event_probabilities): """Converts 1-D array of event probabilities to 2-D array. E = number of examples :param event_probabilities: length-E numpy array of event probabilities. :return: class_probability_matrix: E-by-2 numpy array, where second column contains probabilities of event and first column contains probabilities of non-event. """ error_checking.assert_is_numpy_array(event_probabilities, num_dimensions=1) error_checking.assert_is_geq_numpy_array(event_probabilities, 0.) error_checking.assert_is_leq_numpy_array(event_probabilities, 1.) these_probs = numpy.reshape(event_probabilities, (len(event_probabilities), 1)) return numpy.hstack((1. - these_probs, these_probs))
def _check_architecture_args(option_dict): """Error-checks input arguments for architecture. :param option_dict: See doc for `create_model`. :return: option_dict: Same as input, except defaults may have been added. """ orig_option_dict = option_dict.copy() option_dict = DEFAULT_ARCHITECTURE_OPTION_DICT.copy() option_dict.update(orig_option_dict) error_checking.assert_is_integer(option_dict[NUM_INPUTS_KEY]) error_checking.assert_is_geq(option_dict[NUM_INPUTS_KEY], 10) dense_layer_neuron_nums = option_dict[DENSE_LAYER_NEURON_NUMS_KEY] error_checking.assert_is_integer_numpy_array(dense_layer_neuron_nums) error_checking.assert_is_numpy_array(dense_layer_neuron_nums, num_dimensions=1) error_checking.assert_is_geq_numpy_array(dense_layer_neuron_nums, 1) num_layers = len(dense_layer_neuron_nums) these_dimensions = numpy.array([num_layers], dtype=int) dense_layer_dropout_rates = option_dict[DENSE_LAYER_DROPOUT_RATES_KEY] error_checking.assert_is_numpy_array(dense_layer_dropout_rates, exact_dimensions=these_dimensions) error_checking.assert_is_leq_numpy_array(dense_layer_dropout_rates, 1., allow_nan=True) error_checking.assert_is_geq(option_dict[L1_WEIGHT_KEY], 0.) error_checking.assert_is_geq(option_dict[L2_WEIGHT_KEY], 0.) error_checking.assert_is_boolean(option_dict[USE_BATCH_NORM_KEY]) error_checking.assert_is_boolean(option_dict[ZERO_OUT_TOP_HR_KEY]) if option_dict[ZERO_OUT_TOP_HR_KEY]: error_checking.assert_is_integer( option_dict[TOP_HEATING_RATE_INDEX_KEY]) error_checking.assert_is_geq(option_dict[TOP_HEATING_RATE_INDEX_KEY], 0) return option_dict
def __init__(self, binary_region_matrix): """Creates new instance. :param binary_region_matrix: M-by-N numpy array of integers in 0...1. If binary_region_matrix[i, j] = 1, grid cell [i, j] is part of the connected region. """ error_checking.assert_is_numpy_array(binary_region_matrix, num_dimensions=2) error_checking.assert_is_integer_numpy_array(binary_region_matrix) error_checking.assert_is_geq_numpy_array(binary_region_matrix, 0) error_checking.assert_is_leq_numpy_array(binary_region_matrix, 1) setattr(self, NUM_GRID_ROWS_KEY, binary_region_matrix.shape[0]) setattr(self, NUM_GRID_COLUMNS_KEY, binary_region_matrix.shape[1]) # self.num_grid_rows = binary_region_matrix.shape[0] # self.num_grid_columns = binary_region_matrix.shape[1] self.row_indices_in_region, self.column_indices_in_region = numpy.where( binary_region_matrix == 1)