コード例 #1
0
    def test_get_frequency_bias(self):
        """Ensures crctness of get_frequency_bias; input values are non-zero."""

        this_frequency_bias = model_eval.get_frequency_bias(
            CONTINGENCY_TABLE_THRESHOLD_HALF)
        self.assertTrue(
            numpy.isclose(this_frequency_bias,
                          FREQUENCY_BIAS_THRESHOLD_HALF,
                          atol=TOLERANCE))
コード例 #2
0
    def test_get_frequency_bias_all_zeros(self):
        """Ensures crctness of get_frequency_bias; input values are all zero."""

        this_bias = model_eval.get_frequency_bias(CONTINGENCY_TABLE_ALL_ZEROS)
        self.assertTrue(numpy.isnan(this_bias))
コード例 #3
0
def run_evaluation(class_probability_matrix, observed_labels, output_dir_name):
    """Evaluates a set of multiclass probabilistic predictions.

    E = number of examples
    K = number of classes

    :param class_probability_matrix: E-by-K numpy array, where
        class_probability_matrix[i, k] = probability that the [i]th example
        belongs to the [k]th class.  Classes should be mutually exclusive and
        collectively exhaustive, so that the sum across each row is 1.0.
    :param observed_labels: length-E numpy array of observed labels.  Each label
        must be an integer from 0...(K - 1).
    :param output_dir_name: Name of output directory.  Results will be saved
        here.
    """

    file_system_utils.mkdir_recursive_if_necessary(
        directory_name=output_dir_name)

    print 'Finding best binarization threshold (front vs. no front)...'

    binarization_threshold, best_gerrity_score = (
        eval_utils.find_best_binarization_threshold(
            class_probability_matrix=class_probability_matrix,
            observed_labels=observed_labels,
            threshold_arg=model_eval.THRESHOLD_ARG_FOR_UNIQUE_FORECASTS,
            criterion_function=eval_utils.get_gerrity_score,
            optimization_direction=eval_utils.MAX_OPTIMIZATION_DIRECTION,
            forecast_precision_for_thresholds=FORECAST_PRECISION_FOR_THRESHOLDS
        ))

    print(
        'Best binarization threshold = {0:.4f} ... corresponding Gerrity score '
        '= {1:.4f}').format(binarization_threshold, best_gerrity_score)

    print 'Determinizing multiclass probabilities...'
    predicted_labels = eval_utils.determinize_probabilities(
        class_probability_matrix=class_probability_matrix,
        binarization_threshold=binarization_threshold)

    contingency_matrix = eval_utils.get_contingency_table(
        predicted_labels=predicted_labels,
        observed_labels=observed_labels,
        num_classes=class_probability_matrix.shape[1])

    print 'Multiclass contingency table is shown below:\n{0:s}'.format(
        str(contingency_matrix))
    print SEPARATOR_STRING

    accuracy = eval_utils.get_accuracy(contingency_matrix)
    peirce_score = eval_utils.get_peirce_score(contingency_matrix)
    heidke_score = eval_utils.get_heidke_score(contingency_matrix)
    gerrity_score = eval_utils.get_gerrity_score(contingency_matrix)

    print(
        'Multiclass accuracy = {0:.4f} ... Peirce score = {1:.4f} ... '
        'Heidke score = {2:.4f} ... Gerrity score = {3:.4f}\n').format(
            accuracy, peirce_score, heidke_score, gerrity_score)

    binary_contingency_dict = model_eval.get_contingency_table(
        forecast_labels=(predicted_labels > 0).astype(int),
        observed_labels=(observed_labels > 0).astype(int))

    print 'Binary contingency table is shown below:\n{0:s}'.format(
        str(binary_contingency_dict))
    print SEPARATOR_STRING

    binary_pod = model_eval.get_pod(binary_contingency_dict)
    binary_pofd = model_eval.get_pofd(binary_contingency_dict)
    binary_success_ratio = model_eval.get_success_ratio(
        binary_contingency_dict)
    binary_focn = model_eval.get_focn(binary_contingency_dict)
    binary_accuracy = model_eval.get_accuracy(binary_contingency_dict)
    binary_csi = model_eval.get_csi(binary_contingency_dict)
    binary_frequency_bias = model_eval.get_frequency_bias(
        binary_contingency_dict)

    print(
        'Binary POD = {0:.4f} ... POFD = {1:.4f} ... success ratio = {2:.4f} '
        '... FOCN = {3:.4f} ... accuracy = {4:.4f} ... CSI = {5:.4f} ... '
        'frequency bias = {6:.4f}\n').format(binary_pod, binary_pofd,
                                             binary_success_ratio, binary_focn,
                                             binary_accuracy, binary_csi,
                                             binary_frequency_bias)

    auc_by_class, sklearn_auc_by_class = _plot_roc_curves(
        class_probability_matrix=class_probability_matrix,
        observed_labels=observed_labels,
        output_dir_name=output_dir_name)
    print '\n'

    aupd_by_class = _plot_performance_diagrams(
        class_probability_matrix=class_probability_matrix,
        observed_labels=observed_labels,
        output_dir_name=output_dir_name)
    print '\n'

    reliability_by_class, bss_by_class = _plot_attributes_diagrams(
        class_probability_matrix=class_probability_matrix,
        observed_labels=observed_labels,
        output_dir_name=output_dir_name)
    print '\n'

    evaluation_file_name = '{0:s}/model_evaluation.p'.format(output_dir_name)
    print 'Writing results to: "{0:s}"...\n'.format(evaluation_file_name)

    eval_utils.write_evaluation_results(
        class_probability_matrix=class_probability_matrix,
        observed_labels=observed_labels,
        binarization_threshold=binarization_threshold,
        accuracy=accuracy,
        peirce_score=peirce_score,
        heidke_score=heidke_score,
        gerrity_score=gerrity_score,
        binary_pod=binary_pod,
        binary_pofd=binary_pofd,
        binary_success_ratio=binary_success_ratio,
        binary_focn=binary_focn,
        binary_accuracy=binary_accuracy,
        binary_csi=binary_csi,
        binary_frequency_bias=binary_frequency_bias,
        auc_by_class=auc_by_class,
        scikit_learn_auc_by_class=sklearn_auc_by_class,
        aupd_by_class=aupd_by_class,
        reliability_by_class=reliability_by_class,
        bss_by_class=bss_by_class,
        pickle_file_name=evaluation_file_name)
コード例 #4
0
def run_evaluation(forecast_probabilities, observed_labels, output_dir_name):
    """Evaluates forecast-observation pairs from any forecasting method.

    Specifically, this method does the following:

    - creates ROC (receiver operating characteristic) curve
    - creates performance diagram
    - creates attributes diagram
    - saves each of the aforelisted figures to a .jpg file
    - computes many performance metrics and saves them to a Pickle file

    :param forecast_probabilities: length-N numpy array of forecast event
        probabilities.
    :param observed_labels: length-N numpy array of observed labels (1 for
        "yes", 0 for "no").
    :param output_dir_name: Name of output directory.
    """

    file_system_utils.mkdir_recursive_if_necessary(
        directory_name=output_dir_name)

    # TODO(thunderhoser): Make binarization threshold an input argument to this
    # method.
    (binarization_threshold, best_csi
    ) = model_eval.find_best_binarization_threshold(
        forecast_probabilities=forecast_probabilities,
        observed_labels=observed_labels,
        threshold_arg=model_eval.THRESHOLD_ARG_FOR_UNIQUE_FORECASTS,
        criterion_function=model_eval.get_csi,
        optimization_direction=model_eval.MAX_OPTIMIZATION_DIRECTION,
        unique_forecast_precision=FORECAST_PRECISION_FOR_THRESHOLDS)

    print (
        'Best binarization threshold = {0:.4f} ... corresponding CSI = {1:.4f}'
    ).format(binarization_threshold, best_csi)

    print 'Binarizing forecast probabilities...'
    forecast_labels = model_eval.binarize_forecast_probs(
        forecast_probabilities=forecast_probabilities,
        binarization_threshold=binarization_threshold)

    print 'Creating contingency table...'
    contingency_table_as_dict = model_eval.get_contingency_table(
        forecast_labels=forecast_labels, observed_labels=observed_labels)
    print '{0:s}\n'.format(str(contingency_table_as_dict))

    print 'Computing performance metrics...'
    pod = model_eval.get_pod(contingency_table_as_dict)
    pofd = model_eval.get_pofd(contingency_table_as_dict)
    success_ratio = model_eval.get_success_ratio(contingency_table_as_dict)
    focn = model_eval.get_focn(contingency_table_as_dict)
    accuracy = model_eval.get_accuracy(contingency_table_as_dict)
    csi = model_eval.get_csi(contingency_table_as_dict)
    frequency_bias = model_eval.get_frequency_bias(contingency_table_as_dict)
    peirce_score = model_eval.get_peirce_score(contingency_table_as_dict)
    heidke_score = model_eval.get_heidke_score(contingency_table_as_dict)

    print (
        'POD = {0:.4f} ... POFD = {1:.4f} ... success ratio = {2:.4f} ... '
        'FOCN = {3:.4f} ... accuracy = {4:.4f} ... CSI = {5:.4f} ... frequency '
        'bias = {6:.4f} ... Peirce score = {7:.4f} ... Heidke score = {8:.4f}\n'
    ).format(pod, pofd, success_ratio, focn, accuracy, csi, frequency_bias,
             peirce_score, heidke_score)

    auc, scikit_learn_auc = _create_roc_curve(
        forecast_probabilities=forecast_probabilities,
        observed_labels=observed_labels, output_dir_name=output_dir_name)
    print '\n'

    bss_dict = _create_attributes_diagram(
        forecast_probabilities=forecast_probabilities,
        observed_labels=observed_labels, output_dir_name=output_dir_name)
    print '\n'

    aupd = _create_performance_diagram(
        forecast_probabilities=forecast_probabilities,
        observed_labels=observed_labels, output_dir_name=output_dir_name)
    print '\n'

    evaluation_file_name = '{0:s}/model_evaluation.p'.format(output_dir_name)
    print 'Writing results to: "{0:s}"...'.format(evaluation_file_name)
    model_eval.write_results(
        forecast_probabilities=forecast_probabilities,
        observed_labels=observed_labels,
        binarization_threshold=binarization_threshold, pod=pod, pofd=pofd,
        success_ratio=success_ratio, focn=focn, accuracy=accuracy, csi=csi,
        frequency_bias=frequency_bias, peirce_score=peirce_score,
        heidke_score=heidke_score, auc=auc, scikit_learn_auc=scikit_learn_auc,
        aupd=aupd, bss_dict=bss_dict, pickle_file_name=evaluation_file_name)