Ejemplo n.º 1
0
    def _viz_confusion_matrices(self):
        """Confusion matrices for each feature set, as plots of misclf rate"""

        # forcing a tuple to ensure the order, in compound array and in viz's
        ds_id_order = tuple(self.datasets.modality_ids)
        num_datasets = len(ds_id_order)
        num_classes = len(self._target_set)
        conf_mat_all = np.empty(
            (self.num_rep_cv, num_classes, num_classes, num_datasets))
        for idx, ds in enumerate(ds_id_order):
            for run in range(self.num_rep_cv):
                conf_mat_all[run, :, :,
                             idx] = self.results.confusion_mat[(ds, run)]

        cm_out_fig_path = pjoin(self._fig_out_dir, 'confusion_matrix')
        confusion_matrices(conf_mat_all, self._target_set, ds_id_order,
                           cm_out_fig_path)

        self._compare_misclf_rate(conf_mat_all, ds_id_order, num_classes)
Ejemplo n.º 2
0
def make_visualizations(results_file_path, out_dir, options_path=None):
    """
    Produces the performance visualizations/comparisons from the cross-validation results.

    Parameters
    ----------
    results_file_path : str
        Path to file containing results produced by `rhst`

    out_dir : str
        Path to a folder to store results.

    """

    results_dict = rhst.load_results_dict(results_file_path)

    # using shorter names for readability
    accuracy_balanced = results_dict['accuracy_balanced']
    method_names = results_dict['method_names']
    num_classes = results_dict['num_classes']
    class_sizes = results_dict['class_sizes']
    confusion_matrix = results_dict['confusion_matrix']
    class_order = results_dict['class_set']
    feature_importances_rf = results_dict['feature_importances_rf']
    feature_names = results_dict['feature_names']
    num_times_misclfd = results_dict['num_times_misclfd']
    num_times_tested = results_dict['num_times_tested']

    feature_importances_available = True
    if options_path is not None:
        user_options = load_options(out_dir, options_path)
        if user_options['classifier_name'].lower(
        ) not in cfg.clfs_with_feature_importance:
            feature_importances_available = False
    else:
        # check if the all values are NaN
        unusable = [
            np.all(np.isnan(method_fi.flatten()))
            for method_fi in feature_importances_rf
        ]
        feature_importances_available = not np.all(unusable)

    try:

        balacc_fig_path = pjoin(out_dir, 'balanced_accuracy')
        visualize.metric_distribution(accuracy_balanced, method_names,
                                      balacc_fig_path, class_sizes,
                                      num_classes, "Balanced Accuracy")

        confmat_fig_path = pjoin(out_dir, 'confusion_matrix')
        visualize.confusion_matrices(confusion_matrix, class_order,
                                     method_names, confmat_fig_path)

        cmp_misclf_fig_path = pjoin(out_dir, 'compare_misclf_rates')
        if num_classes > 2:
            visualize.compare_misclf_pairwise(confusion_matrix, class_order,
                                              method_names,
                                              cmp_misclf_fig_path)
        elif num_classes == 2:
            visualize.compare_misclf_pairwise_parallel_coord_plot(
                confusion_matrix, class_order, method_names,
                cmp_misclf_fig_path)

        if feature_importances_available:
            featimp_fig_path = pjoin(out_dir, 'feature_importance')
            visualize.feature_importance_map(feature_importances_rf,
                                             method_names, featimp_fig_path,
                                             feature_names)
        else:
            print(
                '\nCurrent predictive model does not provide feature importance values. Skipping them.'
            )

        misclf_out_path = pjoin(out_dir, 'misclassified_subjects')
        visualize.freq_hist_misclassifications(num_times_misclfd,
                                               num_times_tested, method_names,
                                               misclf_out_path)
    except:
        traceback.print_exc()
        warnings.warn('Error generating the visualizations! Skipping ..')

    # cleaning up
    plt.close('all')

    return