예제 #1
0
def load_results_from_folder(results_folder):
    """

    Given a base output folder, possibly containing results for multiple sub-groups,
        returns a dictionary of results, keyed in by sub group identifier.

    """

    results = dict()
    options = load_options(results_folder)
    for ix, sg in enumerate(options['sub_groups']):
        sg_id = sub_group_identifier(sg, ix)
        results_file_path = pjoin(results_folder, sg_id, cfg.file_name_results)
        if not pexists(
                results_file_path) or os.path.getsize(results_file_path) <= 0:
            raise IOError('Results file for sub group {} does not exist'
                          ' or is empty!'.format(sg_id))
        results[sg_id] = load_results_dict(results_file_path)

    return results
예제 #2
0
def print_options(run_dir):
    """
    Prints options used in a previous run.

    Parameters
    ----------
    run_dir : str
        Path to a folder to with options from a previous run stored.

    """

    from neuropredict.utils import load_options

    print('\n\nOptions used in the run\n{}\n'.format(run_dir))
    user_options = load_options(run_dir)
    # print(user_options)
    for key, val in user_options.items():
        if key.lower() not in ('sample_ids', 'classes'):
            print('{:>25} : {}'.format(key, val))

    return
예제 #3
0
def make_visualizations(results_file_path, out_dir, options_path=None):
    """
    Produces the performance visualizations/comparisons from the cross-validation results.

    Parameters
    ----------
    results_file_path : str
        Path to file containing results produced by `rhst`

    out_dir : str
        Path to a folder to store results.

    """

    results_dict = rhst.load_results_dict(results_file_path)

    # using shorter names for readability
    accuracy_balanced = results_dict['accuracy_balanced']
    method_names = results_dict['method_names']
    num_classes = results_dict['num_classes']
    class_sizes = results_dict['class_sizes']
    confusion_matrix = results_dict['confusion_matrix']
    class_order = results_dict['class_set']
    feature_importances_rf = results_dict['feature_importances_rf']
    feature_names = results_dict['feature_names']
    num_times_misclfd = results_dict['num_times_misclfd']
    num_times_tested = results_dict['num_times_tested']

    feature_importances_available = True
    if options_path is not None:
        user_options = load_options(out_dir, options_path)
        if user_options['classifier_name'].lower(
        ) not in cfg.clfs_with_feature_importance:
            feature_importances_available = False
    else:
        # check if the all values are NaN
        unusable = [
            np.all(np.isnan(method_fi.flatten()))
            for method_fi in feature_importances_rf
        ]
        feature_importances_available = not np.all(unusable)

    try:

        balacc_fig_path = pjoin(out_dir, 'balanced_accuracy')
        visualize.metric_distribution(accuracy_balanced, method_names,
                                      balacc_fig_path, class_sizes,
                                      num_classes, "Balanced Accuracy")

        confmat_fig_path = pjoin(out_dir, 'confusion_matrix')
        visualize.confusion_matrices(confusion_matrix, class_order,
                                     method_names, confmat_fig_path)

        cmp_misclf_fig_path = pjoin(out_dir, 'compare_misclf_rates')
        if num_classes > 2:
            visualize.compare_misclf_pairwise(confusion_matrix, class_order,
                                              method_names,
                                              cmp_misclf_fig_path)
        elif num_classes == 2:
            visualize.compare_misclf_pairwise_parallel_coord_plot(
                confusion_matrix, class_order, method_names,
                cmp_misclf_fig_path)

        if feature_importances_available:
            featimp_fig_path = pjoin(out_dir, 'feature_importance')
            visualize.feature_importance_map(feature_importances_rf,
                                             method_names, featimp_fig_path,
                                             feature_names)
        else:
            print(
                '\nCurrent predictive model does not provide feature importance values. Skipping them.'
            )

        misclf_out_path = pjoin(out_dir, 'misclassified_subjects')
        visualize.freq_hist_misclassifications(num_times_misclfd,
                                               num_times_tested, method_names,
                                               misclf_out_path)
    except:
        traceback.print_exc()
        warnings.warn('Error generating the visualizations! Skipping ..')

    # cleaning up
    plt.close('all')

    return
예제 #4
0
def export_results(dict_to_save, out_dir, options_path):
    """
    Exports the results to simpler CSV format for use in other packages!

    Parameters
    ----------
    dict_to_save : dict
        Containing all the relevant results

    out_dir : str
        Path to save the results to.

    Returns
    -------
    None

    """

    confusion_matrix = dict_to_save['confusion_matrix']
    accuracy_balanced = dict_to_save['accuracy_balanced']
    method_names = dict_to_save['method_names']
    feature_importances_rf = dict_to_save['feature_importances_rf']
    feature_names = dict_to_save['feature_names']
    num_times_misclfd = dict_to_save['num_times_misclfd']
    num_times_tested = dict_to_save['num_times_tested']

    num_rep_cv = confusion_matrix.shape[0]
    num_datasets = confusion_matrix.shape[3]
    num_classes = confusion_matrix.shape[2]

    # separating CSVs from the PDFs
    exp_dir = pjoin(out_dir, cfg.EXPORT_DIR_NAME)
    os.makedirs(exp_dir, exist_ok=True)

    # TODO think about how to export predictive probability per class per CV rep
    # pred_prob_per_class

    user_options = load_options(out_dir, options_path)
    print_aligned_msg = lambda msg1, msg2: print(
        'Exporting {msg1:<40} .. {msg2}'.format(msg1=msg1, msg2=msg2))

    print('')
    try:
        # accuracy
        balacc_path = pjoin(exp_dir, 'balanced_accuracy.csv')
        np.savetxt(balacc_path,
                   accuracy_balanced,
                   delimiter=cfg.DELIMITER,
                   fmt=cfg.EXPORT_FORMAT,
                   header=','.join(method_names))
        print_aligned_msg('accuracy distribution', 'Done.')

        # conf mat
        for mm in range(num_datasets):
            confmat_path = pjoin(
                exp_dir, 'confusion_matrix_{}.csv'.format(method_names[mm]))
            reshaped_matrix = np.reshape(
                confusion_matrix[:, :, :,
                                 mm], [num_rep_cv, num_classes * num_classes])
            np.savetxt(
                confmat_path,
                reshaped_matrix,
                delimiter=cfg.DELIMITER,
                fmt=cfg.EXPORT_FORMAT,
                comments=
                'shape of confusion matrix: num_repetitions x num_classes^2')
        print_aligned_msg('confusion matrices', 'Done.')

        # misclassfiication rates
        avg_cfmat, misclf_rate = visualize.compute_pairwise_misclf(
            confusion_matrix)
        num_datasets = misclf_rate.shape[0]
        for mm in range(num_datasets):
            cmp_misclf_path = pjoin(
                exp_dir, 'average_misclassification_rates_{}.csv'.format(
                    method_names[mm]))
            np.savetxt(cmp_misclf_path,
                       misclf_rate[mm, :],
                       fmt=cfg.EXPORT_FORMAT,
                       delimiter=cfg.DELIMITER)
        print_aligned_msg('misclassfiication rates', 'Done.')

        # feature importance
        if user_options['classifier_name'].lower(
        ) in cfg.clfs_with_feature_importance:
            for mm in range(num_datasets):
                featimp_path = pjoin(
                    exp_dir,
                    'feature_importance_{}.csv'.format(method_names[mm]))
                np.savetxt(featimp_path,
                           feature_importances_rf[mm],
                           fmt=cfg.EXPORT_FORMAT,
                           delimiter=cfg.DELIMITER,
                           header=','.join(feature_names[mm]))
            print_aligned_msg('feature importance values', 'Done.')

        else:
            print_aligned_msg('feature importance values', 'Skipped.')
            print('\tCurrent predictive model does not provide them.')

        # subject-wise misclf frequencies
        perc_misclsfd, _, _, _ = visualize.compute_perc_misclf_per_sample(
            num_times_misclfd, num_times_tested)
        for mm in range(num_datasets):
            subwise_misclf_path = pjoin(
                exp_dir, 'subject_misclf_freq_{}.csv'.format(method_names[mm]))
            # TODO there must be a more elegant way to write dict to CSV
            with open(subwise_misclf_path, 'w') as smf:
                for sid, val in perc_misclsfd[mm].items():
                    smf.write('{}{}{}\n'.format(sid, cfg.DELIMITER, val))
        print_aligned_msg('subject-wise misclf frequencies', 'Done.')

    except:
        traceback.print_exc()
        raise IOError('Unable to export the results to CSV files.')

    return