def test_get_frequency_bias(self): """Ensures crctness of get_frequency_bias; input values are non-zero.""" this_frequency_bias = model_eval.get_frequency_bias( CONTINGENCY_TABLE_THRESHOLD_HALF) self.assertTrue( numpy.isclose(this_frequency_bias, FREQUENCY_BIAS_THRESHOLD_HALF, atol=TOLERANCE))
def test_get_frequency_bias_all_zeros(self): """Ensures crctness of get_frequency_bias; input values are all zero.""" this_bias = model_eval.get_frequency_bias(CONTINGENCY_TABLE_ALL_ZEROS) self.assertTrue(numpy.isnan(this_bias))
def run_evaluation(class_probability_matrix, observed_labels, output_dir_name): """Evaluates a set of multiclass probabilistic predictions. E = number of examples K = number of classes :param class_probability_matrix: E-by-K numpy array, where class_probability_matrix[i, k] = probability that the [i]th example belongs to the [k]th class. Classes should be mutually exclusive and collectively exhaustive, so that the sum across each row is 1.0. :param observed_labels: length-E numpy array of observed labels. Each label must be an integer from 0...(K - 1). :param output_dir_name: Name of output directory. Results will be saved here. """ file_system_utils.mkdir_recursive_if_necessary( directory_name=output_dir_name) print 'Finding best binarization threshold (front vs. no front)...' binarization_threshold, best_gerrity_score = ( eval_utils.find_best_binarization_threshold( class_probability_matrix=class_probability_matrix, observed_labels=observed_labels, threshold_arg=model_eval.THRESHOLD_ARG_FOR_UNIQUE_FORECASTS, criterion_function=eval_utils.get_gerrity_score, optimization_direction=eval_utils.MAX_OPTIMIZATION_DIRECTION, forecast_precision_for_thresholds=FORECAST_PRECISION_FOR_THRESHOLDS )) print( 'Best binarization threshold = {0:.4f} ... corresponding Gerrity score ' '= {1:.4f}').format(binarization_threshold, best_gerrity_score) print 'Determinizing multiclass probabilities...' predicted_labels = eval_utils.determinize_probabilities( class_probability_matrix=class_probability_matrix, binarization_threshold=binarization_threshold) contingency_matrix = eval_utils.get_contingency_table( predicted_labels=predicted_labels, observed_labels=observed_labels, num_classes=class_probability_matrix.shape[1]) print 'Multiclass contingency table is shown below:\n{0:s}'.format( str(contingency_matrix)) print SEPARATOR_STRING accuracy = eval_utils.get_accuracy(contingency_matrix) peirce_score = eval_utils.get_peirce_score(contingency_matrix) heidke_score = eval_utils.get_heidke_score(contingency_matrix) gerrity_score = eval_utils.get_gerrity_score(contingency_matrix) print( 'Multiclass accuracy = {0:.4f} ... Peirce score = {1:.4f} ... ' 'Heidke score = {2:.4f} ... Gerrity score = {3:.4f}\n').format( accuracy, peirce_score, heidke_score, gerrity_score) binary_contingency_dict = model_eval.get_contingency_table( forecast_labels=(predicted_labels > 0).astype(int), observed_labels=(observed_labels > 0).astype(int)) print 'Binary contingency table is shown below:\n{0:s}'.format( str(binary_contingency_dict)) print SEPARATOR_STRING binary_pod = model_eval.get_pod(binary_contingency_dict) binary_pofd = model_eval.get_pofd(binary_contingency_dict) binary_success_ratio = model_eval.get_success_ratio( binary_contingency_dict) binary_focn = model_eval.get_focn(binary_contingency_dict) binary_accuracy = model_eval.get_accuracy(binary_contingency_dict) binary_csi = model_eval.get_csi(binary_contingency_dict) binary_frequency_bias = model_eval.get_frequency_bias( binary_contingency_dict) print( 'Binary POD = {0:.4f} ... POFD = {1:.4f} ... success ratio = {2:.4f} ' '... FOCN = {3:.4f} ... accuracy = {4:.4f} ... CSI = {5:.4f} ... ' 'frequency bias = {6:.4f}\n').format(binary_pod, binary_pofd, binary_success_ratio, binary_focn, binary_accuracy, binary_csi, binary_frequency_bias) auc_by_class, sklearn_auc_by_class = _plot_roc_curves( class_probability_matrix=class_probability_matrix, observed_labels=observed_labels, output_dir_name=output_dir_name) print '\n' aupd_by_class = _plot_performance_diagrams( class_probability_matrix=class_probability_matrix, observed_labels=observed_labels, output_dir_name=output_dir_name) print '\n' reliability_by_class, bss_by_class = _plot_attributes_diagrams( class_probability_matrix=class_probability_matrix, observed_labels=observed_labels, output_dir_name=output_dir_name) print '\n' evaluation_file_name = '{0:s}/model_evaluation.p'.format(output_dir_name) print 'Writing results to: "{0:s}"...\n'.format(evaluation_file_name) eval_utils.write_evaluation_results( class_probability_matrix=class_probability_matrix, observed_labels=observed_labels, binarization_threshold=binarization_threshold, accuracy=accuracy, peirce_score=peirce_score, heidke_score=heidke_score, gerrity_score=gerrity_score, binary_pod=binary_pod, binary_pofd=binary_pofd, binary_success_ratio=binary_success_ratio, binary_focn=binary_focn, binary_accuracy=binary_accuracy, binary_csi=binary_csi, binary_frequency_bias=binary_frequency_bias, auc_by_class=auc_by_class, scikit_learn_auc_by_class=sklearn_auc_by_class, aupd_by_class=aupd_by_class, reliability_by_class=reliability_by_class, bss_by_class=bss_by_class, pickle_file_name=evaluation_file_name)
def run_evaluation(forecast_probabilities, observed_labels, output_dir_name): """Evaluates forecast-observation pairs from any forecasting method. Specifically, this method does the following: - creates ROC (receiver operating characteristic) curve - creates performance diagram - creates attributes diagram - saves each of the aforelisted figures to a .jpg file - computes many performance metrics and saves them to a Pickle file :param forecast_probabilities: length-N numpy array of forecast event probabilities. :param observed_labels: length-N numpy array of observed labels (1 for "yes", 0 for "no"). :param output_dir_name: Name of output directory. """ file_system_utils.mkdir_recursive_if_necessary( directory_name=output_dir_name) # TODO(thunderhoser): Make binarization threshold an input argument to this # method. (binarization_threshold, best_csi ) = model_eval.find_best_binarization_threshold( forecast_probabilities=forecast_probabilities, observed_labels=observed_labels, threshold_arg=model_eval.THRESHOLD_ARG_FOR_UNIQUE_FORECASTS, criterion_function=model_eval.get_csi, optimization_direction=model_eval.MAX_OPTIMIZATION_DIRECTION, unique_forecast_precision=FORECAST_PRECISION_FOR_THRESHOLDS) print ( 'Best binarization threshold = {0:.4f} ... corresponding CSI = {1:.4f}' ).format(binarization_threshold, best_csi) print 'Binarizing forecast probabilities...' forecast_labels = model_eval.binarize_forecast_probs( forecast_probabilities=forecast_probabilities, binarization_threshold=binarization_threshold) print 'Creating contingency table...' contingency_table_as_dict = model_eval.get_contingency_table( forecast_labels=forecast_labels, observed_labels=observed_labels) print '{0:s}\n'.format(str(contingency_table_as_dict)) print 'Computing performance metrics...' pod = model_eval.get_pod(contingency_table_as_dict) pofd = model_eval.get_pofd(contingency_table_as_dict) success_ratio = model_eval.get_success_ratio(contingency_table_as_dict) focn = model_eval.get_focn(contingency_table_as_dict) accuracy = model_eval.get_accuracy(contingency_table_as_dict) csi = model_eval.get_csi(contingency_table_as_dict) frequency_bias = model_eval.get_frequency_bias(contingency_table_as_dict) peirce_score = model_eval.get_peirce_score(contingency_table_as_dict) heidke_score = model_eval.get_heidke_score(contingency_table_as_dict) print ( 'POD = {0:.4f} ... POFD = {1:.4f} ... success ratio = {2:.4f} ... ' 'FOCN = {3:.4f} ... accuracy = {4:.4f} ... CSI = {5:.4f} ... frequency ' 'bias = {6:.4f} ... Peirce score = {7:.4f} ... Heidke score = {8:.4f}\n' ).format(pod, pofd, success_ratio, focn, accuracy, csi, frequency_bias, peirce_score, heidke_score) auc, scikit_learn_auc = _create_roc_curve( forecast_probabilities=forecast_probabilities, observed_labels=observed_labels, output_dir_name=output_dir_name) print '\n' bss_dict = _create_attributes_diagram( forecast_probabilities=forecast_probabilities, observed_labels=observed_labels, output_dir_name=output_dir_name) print '\n' aupd = _create_performance_diagram( forecast_probabilities=forecast_probabilities, observed_labels=observed_labels, output_dir_name=output_dir_name) print '\n' evaluation_file_name = '{0:s}/model_evaluation.p'.format(output_dir_name) print 'Writing results to: "{0:s}"...'.format(evaluation_file_name) model_eval.write_results( forecast_probabilities=forecast_probabilities, observed_labels=observed_labels, binarization_threshold=binarization_threshold, pod=pod, pofd=pofd, success_ratio=success_ratio, focn=focn, accuracy=accuracy, csi=csi, frequency_bias=frequency_bias, peirce_score=peirce_score, heidke_score=heidke_score, auc=auc, scikit_learn_auc=scikit_learn_auc, aupd=aupd, bss_dict=bss_dict, pickle_file_name=evaluation_file_name)