def test_get_binarization_thresholds_bad_input(self): """Ensures correct output from get_binarization_thresholds. In this case, input `threshold_arg` is invalid. """ with self.assertRaises(ValueError): model_eval.get_binarization_thresholds( threshold_arg=FAKE_THRESHOLD_ARG)
def test_get_binarization_thresholds_from_number(self): """Ensures correct output from get_binarization_thresholds. In this case, only number of thresholds is input directly. """ these_thresholds = model_eval.get_binarization_thresholds( threshold_arg=NUM_THRESHOLDS_FOR_INPUT) self.assertTrue( numpy.allclose(these_thresholds, THRESHOLDS_FROM_NUMBER, atol=TOLERANCE))
def test_get_binarization_thresholds_direct_input(self): """Ensures correct output from get_binarization_thresholds. In this case, desired thresholds are input directly. """ these_thresholds = model_eval.get_binarization_thresholds( threshold_arg=THRESHOLDS_FROM_DIRECT_INPUT) self.assertTrue( numpy.allclose(these_thresholds, THRESHOLDS_FROM_DIRECT_INPUT, atol=TOLERANCE))
def test_get_binarization_thresholds_from_unique_forecasts(self): """Ensures correct output from get_binarization_thresholds. In this case, binarization thresholds are determined from unique forecasts. """ these_thresholds = model_eval.get_binarization_thresholds( threshold_arg=model_eval.THRESHOLD_ARG_FOR_UNIQUE_FORECASTS, forecast_probabilities=FORECAST_PROBS_FOR_THRESHOLDS, forecast_precision=FORECAST_PRECISION) self.assertTrue(numpy.allclose( these_thresholds, THRESHOLDS_FROM_UNIQUE_FORECASTS, atol=TOLERANCE ))
def find_best_binarization_threshold( class_probability_matrix, observed_labels, threshold_arg, criterion_function, optimization_direction=MAX_OPTIMIZATION_DIRECTION, forecast_precision_for_thresholds= DEFAULT_FORECAST_PRECISION): """Finds the best binarization threshold. A "binarization threshold" is used to determinize probabilistic (either binary or multi-class) predictions, using the following procedure. f* = binarization threshold, and f_0 is the forecast probability of class 0 (no front). [1] If f_0 >= f*, predict no front. [2] If f_0 < f*, predict a front. In multi-class problems, frontal type (warm or cold) is determined by whichever of the non-zero classes has the highest predicted probability. In the following definitions, P = number of evaluation pairs and K = number of classes. :param class_probability_matrix: See documentation for `check_evaluation_pairs`. :param observed_labels: See doc for `check_evaluation_pairs`. :param threshold_arg: See documentation for `model_evaluation.get_binarization_thresholds`. Determines which thresholds will be tried. :param criterion_function: Criterion to be either minimized or maximized. This must be a function that takes input `contingency_table_as_matrix` and returns a single float. See `get_gerrity_score` in this module for an example. :param optimization_direction: Direction in which criterion function is optimized. Options are "min" and "max". :param forecast_precision_for_thresholds: See documentation for `model_evaluation.get_binarization_thresholds`. Determines which thresholds will be tried. :return: best_threshold: Best binarization threshold. :return: best_criterion_value: Value of criterion function at said threshold. """ check_evaluation_pairs( class_probability_matrix=class_probability_matrix, observed_labels=observed_labels) error_checking.assert_is_string(optimization_direction) if optimization_direction not in VALID_OPTIMIZATION_DIRECTIONS: error_string = ( '\n\n{0:s}\nValid optimization directions (listed above) do not ' 'include "{1:s}".').format(VALID_OPTIMIZATION_DIRECTIONS, optimization_direction) raise ValueError(error_string) possible_thresholds = model_eval.get_binarization_thresholds( threshold_arg=threshold_arg, forecast_probabilities=class_probability_matrix[:, 0], unique_forecast_precision=forecast_precision_for_thresholds) num_thresholds = len(possible_thresholds) criterion_values = numpy.full(num_thresholds, numpy.nan) # for i in range(num_thresholds): # these_predicted_labels = model_eval.binarize_forecast_probs( # forecast_probabilities=class_probability_matrix[:, 0], # binarization_threshold=possible_thresholds[i]) # # these_predicted_labels = numpy.invert( # these_predicted_labels.astype(bool)).astype(int) # # this_contingency_table_as_dict = model_eval.get_contingency_table( # forecast_labels=these_predicted_labels, # observed_labels=(observed_labels > 0).astype(int)) # # criterion_values[i] = criterion_function( # this_contingency_table_as_dict) for i in range(num_thresholds): these_predicted_labels = determinize_probabilities( class_probability_matrix=class_probability_matrix, binarization_threshold=possible_thresholds[i]) this_contingency_table_as_matrix = get_contingency_table( predicted_labels=these_predicted_labels, observed_labels=observed_labels, num_classes=class_probability_matrix.shape[1]) criterion_values[i] = criterion_function( this_contingency_table_as_matrix) if optimization_direction == MAX_OPTIMIZATION_DIRECTION: best_criterion_value = numpy.nanmax(criterion_values) best_probability_threshold = possible_thresholds[ numpy.nanargmax(criterion_values)] else: best_criterion_value = numpy.nanmin(criterion_values) best_probability_threshold = possible_thresholds[ numpy.nanargmin(criterion_values)] return best_probability_threshold, best_criterion_value
def _compute_scores(forecast_probabilities, observed_labels, num_bootstrap_reps, output_file_name, best_prob_threshold=None, downsampling_dict=None): """Computes evaluation scores. E = number of examples (storm objects) :param forecast_probabilities: length-E numpy array of forecast event probabilities. :param observed_labels: length-E numpy array of observations (1 for event, 0 for non-event). :param num_bootstrap_reps: Number of bootstrap replicates. :param output_file_name: Path to output file (will be written by `model_evaluation.write_evaluation`). :param best_prob_threshold: Best probability threshold. If None, will be determined on the fly. :param downsampling_dict: Dictionary with downsampling fractions. See doc for `deep_learning_utils.sample_by_class`. If this is None, downsampling will not be used. """ num_examples = len(observed_labels) num_examples_by_class = numpy.unique(observed_labels, return_counts=True)[-1] print('Number of examples by class (no downsampling): {0:s}'.format( str(num_examples_by_class))) positive_example_indices = numpy.where(observed_labels == 1)[0] negative_example_indices = numpy.where(observed_labels == 0)[0] if downsampling_dict is None: these_indices = numpy.linspace(0, num_examples - 1, num=num_examples, dtype=int) else: these_indices = dl_utils.sample_by_class( sampling_fraction_by_class_dict=downsampling_dict, target_name=DUMMY_TARGET_NAME, target_values=observed_labels, num_examples_total=num_examples) this_num_ex_by_class = numpy.unique(observed_labels[these_indices], return_counts=True)[-1] print('Number of examples by class (after downsampling): {0:s}'.format( str(this_num_ex_by_class))) all_prob_thresholds = model_eval.get_binarization_thresholds( threshold_arg=model_eval.THRESHOLD_ARG_FOR_UNIQUE_FORECASTS, forecast_probabilities=forecast_probabilities[these_indices], forecast_precision=FORECAST_PRECISION) if best_prob_threshold is None: best_prob_threshold, best_csi = ( model_eval.find_best_binarization_threshold( forecast_probabilities=forecast_probabilities[these_indices], observed_labels=observed_labels[these_indices], threshold_arg=all_prob_thresholds, criterion_function=model_eval.get_csi, optimization_direction=model_eval.MAX_OPTIMIZATION_STRING)) else: these_forecast_labels = model_eval.binarize_forecast_probs( forecast_probabilities=forecast_probabilities[these_indices], binarization_threshold=best_prob_threshold) this_contingency_dict = model_eval.get_contingency_table( forecast_labels=these_forecast_labels, observed_labels=observed_labels[these_indices]) best_csi = model_eval.get_csi(this_contingency_dict) print( ('Best probability threshold = {0:.4f} ... corresponding CSI = {1:.4f}' ).format(best_prob_threshold, best_csi)) num_examples_by_forecast_bin = model_eval.get_points_in_reliability_curve( forecast_probabilities=forecast_probabilities[these_indices], observed_labels=observed_labels[these_indices], num_forecast_bins=model_eval.DEFAULT_NUM_RELIABILITY_BINS)[-1] list_of_evaluation_tables = [] for i in range(num_bootstrap_reps): print(('Computing scores for {0:d}th of {1:d} bootstrap replicates...' ).format(i + 1, num_bootstrap_reps)) if num_bootstrap_reps == 1: if downsampling_dict is None: these_indices = numpy.linspace(0, num_examples - 1, num=num_examples, dtype=int) else: these_indices = dl_utils.sample_by_class( sampling_fraction_by_class_dict=downsampling_dict, target_name=DUMMY_TARGET_NAME, target_values=observed_labels, num_examples_total=num_examples) else: if len(positive_example_indices) > 0: these_positive_indices = bootstrapping.draw_sample( positive_example_indices)[0] else: these_positive_indices = numpy.array([], dtype=int) these_negative_indices = bootstrapping.draw_sample( negative_example_indices)[0] these_indices = numpy.concatenate( (these_positive_indices, these_negative_indices)) if downsampling_dict is not None: these_subindices = dl_utils.sample_by_class( sampling_fraction_by_class_dict=downsampling_dict, target_name=DUMMY_TARGET_NAME, target_values=observed_labels[these_indices], num_examples_total=num_examples) these_indices = these_indices[these_subindices] if downsampling_dict is not None: this_num_ex_by_class = numpy.unique(observed_labels[these_indices], return_counts=True)[-1] print('Number of examples by class: {0:s}'.format( str(this_num_ex_by_class))) this_evaluation_table = model_eval.run_evaluation( forecast_probabilities=forecast_probabilities[these_indices], observed_labels=observed_labels[these_indices], best_prob_threshold=best_prob_threshold, all_prob_thresholds=all_prob_thresholds, climatology=numpy.mean(observed_labels[these_indices])) list_of_evaluation_tables.append(this_evaluation_table) if i == num_bootstrap_reps - 1: print(SEPARATOR_STRING) else: print(MINOR_SEPARATOR_STRING) if i == 0: continue list_of_evaluation_tables[-1] = list_of_evaluation_tables[-1].align( list_of_evaluation_tables[0], axis=1)[0] evaluation_table = pandas.concat(list_of_evaluation_tables, axis=0, ignore_index=True) print('Writing results to: "{0:s}"...'.format(output_file_name)) model_eval.write_evaluation( pickle_file_name=output_file_name, forecast_probabilities=forecast_probabilities, observed_labels=observed_labels, best_prob_threshold=best_prob_threshold, all_prob_thresholds=all_prob_thresholds, num_examples_by_forecast_bin=num_examples_by_forecast_bin, downsampling_dict=downsampling_dict, evaluation_table=evaluation_table)