def _viz_confusion_matrices(self): """Confusion matrices for each feature set, as plots of misclf rate""" # forcing a tuple to ensure the order, in compound array and in viz's ds_id_order = tuple(self.datasets.modality_ids) num_datasets = len(ds_id_order) num_classes = len(self._target_set) conf_mat_all = np.empty( (self.num_rep_cv, num_classes, num_classes, num_datasets)) for idx, ds in enumerate(ds_id_order): for run in range(self.num_rep_cv): conf_mat_all[run, :, :, idx] = self.results.confusion_mat[(ds, run)] cm_out_fig_path = pjoin(self._fig_out_dir, 'confusion_matrix') confusion_matrices(conf_mat_all, self._target_set, ds_id_order, cm_out_fig_path) self._compare_misclf_rate(conf_mat_all, ds_id_order, num_classes)
def make_visualizations(results_file_path, out_dir, options_path=None): """ Produces the performance visualizations/comparisons from the cross-validation results. Parameters ---------- results_file_path : str Path to file containing results produced by `rhst` out_dir : str Path to a folder to store results. """ results_dict = rhst.load_results_dict(results_file_path) # using shorter names for readability accuracy_balanced = results_dict['accuracy_balanced'] method_names = results_dict['method_names'] num_classes = results_dict['num_classes'] class_sizes = results_dict['class_sizes'] confusion_matrix = results_dict['confusion_matrix'] class_order = results_dict['class_set'] feature_importances_rf = results_dict['feature_importances_rf'] feature_names = results_dict['feature_names'] num_times_misclfd = results_dict['num_times_misclfd'] num_times_tested = results_dict['num_times_tested'] feature_importances_available = True if options_path is not None: user_options = load_options(out_dir, options_path) if user_options['classifier_name'].lower( ) not in cfg.clfs_with_feature_importance: feature_importances_available = False else: # check if the all values are NaN unusable = [ np.all(np.isnan(method_fi.flatten())) for method_fi in feature_importances_rf ] feature_importances_available = not np.all(unusable) try: balacc_fig_path = pjoin(out_dir, 'balanced_accuracy') visualize.metric_distribution(accuracy_balanced, method_names, balacc_fig_path, class_sizes, num_classes, "Balanced Accuracy") confmat_fig_path = pjoin(out_dir, 'confusion_matrix') visualize.confusion_matrices(confusion_matrix, class_order, method_names, confmat_fig_path) cmp_misclf_fig_path = pjoin(out_dir, 'compare_misclf_rates') if num_classes > 2: visualize.compare_misclf_pairwise(confusion_matrix, class_order, method_names, cmp_misclf_fig_path) elif num_classes == 2: visualize.compare_misclf_pairwise_parallel_coord_plot( confusion_matrix, class_order, method_names, cmp_misclf_fig_path) if feature_importances_available: featimp_fig_path = pjoin(out_dir, 'feature_importance') visualize.feature_importance_map(feature_importances_rf, method_names, featimp_fig_path, feature_names) else: print( '\nCurrent predictive model does not provide feature importance values. Skipping them.' ) misclf_out_path = pjoin(out_dir, 'misclassified_subjects') visualize.freq_hist_misclassifications(num_times_misclfd, num_times_tested, method_names, misclf_out_path) except: traceback.print_exc() warnings.warn('Error generating the visualizations! Skipping ..') # cleaning up plt.close('all') return