def load_results_from_folder(results_folder): """ Given a base output folder, possibly containing results for multiple sub-groups, returns a dictionary of results, keyed in by sub group identifier. """ results = dict() options = load_options(results_folder) for ix, sg in enumerate(options['sub_groups']): sg_id = sub_group_identifier(sg, ix) results_file_path = pjoin(results_folder, sg_id, cfg.file_name_results) if not pexists( results_file_path) or os.path.getsize(results_file_path) <= 0: raise IOError('Results file for sub group {} does not exist' ' or is empty!'.format(sg_id)) results[sg_id] = load_results_dict(results_file_path) return results
def print_options(run_dir): """ Prints options used in a previous run. Parameters ---------- run_dir : str Path to a folder to with options from a previous run stored. """ from neuropredict.utils import load_options print('\n\nOptions used in the run\n{}\n'.format(run_dir)) user_options = load_options(run_dir) # print(user_options) for key, val in user_options.items(): if key.lower() not in ('sample_ids', 'classes'): print('{:>25} : {}'.format(key, val)) return
def make_visualizations(results_file_path, out_dir, options_path=None): """ Produces the performance visualizations/comparisons from the cross-validation results. Parameters ---------- results_file_path : str Path to file containing results produced by `rhst` out_dir : str Path to a folder to store results. """ results_dict = rhst.load_results_dict(results_file_path) # using shorter names for readability accuracy_balanced = results_dict['accuracy_balanced'] method_names = results_dict['method_names'] num_classes = results_dict['num_classes'] class_sizes = results_dict['class_sizes'] confusion_matrix = results_dict['confusion_matrix'] class_order = results_dict['class_set'] feature_importances_rf = results_dict['feature_importances_rf'] feature_names = results_dict['feature_names'] num_times_misclfd = results_dict['num_times_misclfd'] num_times_tested = results_dict['num_times_tested'] feature_importances_available = True if options_path is not None: user_options = load_options(out_dir, options_path) if user_options['classifier_name'].lower( ) not in cfg.clfs_with_feature_importance: feature_importances_available = False else: # check if the all values are NaN unusable = [ np.all(np.isnan(method_fi.flatten())) for method_fi in feature_importances_rf ] feature_importances_available = not np.all(unusable) try: balacc_fig_path = pjoin(out_dir, 'balanced_accuracy') visualize.metric_distribution(accuracy_balanced, method_names, balacc_fig_path, class_sizes, num_classes, "Balanced Accuracy") confmat_fig_path = pjoin(out_dir, 'confusion_matrix') visualize.confusion_matrices(confusion_matrix, class_order, method_names, confmat_fig_path) cmp_misclf_fig_path = pjoin(out_dir, 'compare_misclf_rates') if num_classes > 2: visualize.compare_misclf_pairwise(confusion_matrix, class_order, method_names, cmp_misclf_fig_path) elif num_classes == 2: visualize.compare_misclf_pairwise_parallel_coord_plot( confusion_matrix, class_order, method_names, cmp_misclf_fig_path) if feature_importances_available: featimp_fig_path = pjoin(out_dir, 'feature_importance') visualize.feature_importance_map(feature_importances_rf, method_names, featimp_fig_path, feature_names) else: print( '\nCurrent predictive model does not provide feature importance values. Skipping them.' ) misclf_out_path = pjoin(out_dir, 'misclassified_subjects') visualize.freq_hist_misclassifications(num_times_misclfd, num_times_tested, method_names, misclf_out_path) except: traceback.print_exc() warnings.warn('Error generating the visualizations! Skipping ..') # cleaning up plt.close('all') return
def export_results(dict_to_save, out_dir, options_path): """ Exports the results to simpler CSV format for use in other packages! Parameters ---------- dict_to_save : dict Containing all the relevant results out_dir : str Path to save the results to. Returns ------- None """ confusion_matrix = dict_to_save['confusion_matrix'] accuracy_balanced = dict_to_save['accuracy_balanced'] method_names = dict_to_save['method_names'] feature_importances_rf = dict_to_save['feature_importances_rf'] feature_names = dict_to_save['feature_names'] num_times_misclfd = dict_to_save['num_times_misclfd'] num_times_tested = dict_to_save['num_times_tested'] num_rep_cv = confusion_matrix.shape[0] num_datasets = confusion_matrix.shape[3] num_classes = confusion_matrix.shape[2] # separating CSVs from the PDFs exp_dir = pjoin(out_dir, cfg.EXPORT_DIR_NAME) os.makedirs(exp_dir, exist_ok=True) # TODO think about how to export predictive probability per class per CV rep # pred_prob_per_class user_options = load_options(out_dir, options_path) print_aligned_msg = lambda msg1, msg2: print( 'Exporting {msg1:<40} .. {msg2}'.format(msg1=msg1, msg2=msg2)) print('') try: # accuracy balacc_path = pjoin(exp_dir, 'balanced_accuracy.csv') np.savetxt(balacc_path, accuracy_balanced, delimiter=cfg.DELIMITER, fmt=cfg.EXPORT_FORMAT, header=','.join(method_names)) print_aligned_msg('accuracy distribution', 'Done.') # conf mat for mm in range(num_datasets): confmat_path = pjoin( exp_dir, 'confusion_matrix_{}.csv'.format(method_names[mm])) reshaped_matrix = np.reshape( confusion_matrix[:, :, :, mm], [num_rep_cv, num_classes * num_classes]) np.savetxt( confmat_path, reshaped_matrix, delimiter=cfg.DELIMITER, fmt=cfg.EXPORT_FORMAT, comments= 'shape of confusion matrix: num_repetitions x num_classes^2') print_aligned_msg('confusion matrices', 'Done.') # misclassfiication rates avg_cfmat, misclf_rate = visualize.compute_pairwise_misclf( confusion_matrix) num_datasets = misclf_rate.shape[0] for mm in range(num_datasets): cmp_misclf_path = pjoin( exp_dir, 'average_misclassification_rates_{}.csv'.format( method_names[mm])) np.savetxt(cmp_misclf_path, misclf_rate[mm, :], fmt=cfg.EXPORT_FORMAT, delimiter=cfg.DELIMITER) print_aligned_msg('misclassfiication rates', 'Done.') # feature importance if user_options['classifier_name'].lower( ) in cfg.clfs_with_feature_importance: for mm in range(num_datasets): featimp_path = pjoin( exp_dir, 'feature_importance_{}.csv'.format(method_names[mm])) np.savetxt(featimp_path, feature_importances_rf[mm], fmt=cfg.EXPORT_FORMAT, delimiter=cfg.DELIMITER, header=','.join(feature_names[mm])) print_aligned_msg('feature importance values', 'Done.') else: print_aligned_msg('feature importance values', 'Skipped.') print('\tCurrent predictive model does not provide them.') # subject-wise misclf frequencies perc_misclsfd, _, _, _ = visualize.compute_perc_misclf_per_sample( num_times_misclfd, num_times_tested) for mm in range(num_datasets): subwise_misclf_path = pjoin( exp_dir, 'subject_misclf_freq_{}.csv'.format(method_names[mm])) # TODO there must be a more elegant way to write dict to CSV with open(subwise_misclf_path, 'w') as smf: for sid, val in perc_misclsfd[mm].items(): smf.write('{}{}{}\n'.format(sid, cfg.DELIMITER, val)) print_aligned_msg('subject-wise misclf frequencies', 'Done.') except: traceback.print_exc() raise IOError('Unable to export the results to CSV files.') return