def execute(self) -> None: from segmenter.aggregators import Aggregators from segmenter.config import config_from_dir super(EvaluateTask, self).execute() job_configs = [ d for d in os.listdir(self.output_dir) if os.path.isdir(os.path.join(self.output_dir, d)) ] if self.args["classes"] is not None: self.classes = list( filter(lambda c: c in self.args["classes"], self.classes)) for job_hash in job_configs: job_config, job_hash = config_from_dir( os.path.join(self.output_dir, job_hash)) folds = ["all"] if job_config["FOLDS"] == 0 else [ "fold{}".format(o) for o in range(job_config["FOLDS"]) ] if job_config["BOOST_FOLDS"] > 0: boost_folds = [ "b{}".format(o) for o in list(range(0, job_config["BOOST_FOLDS"] + 1)) ] folds = [ "".join(o) for o in itertools.product(*[self.folds, boost_folds]) ] if self.args["folds"] is not None: folds = list(filter(lambda c: c in self.args["folds"], folds)) if job_config["SEARCH"]: folds = ["fold0"] if len(folds) <= 1: aggregators = ["dummy"] else: aggregators = Aggregators.choices() if self.args["aggregators"] is not None: aggregators = list( filter(lambda c: c in self.args["aggregators"], aggregators)) for clazz in self.classes: self.evaluator(clazz, job_config, job_hash, self.data_dir, self.output_dir, self.weight_finder, folds=folds, aggregators=aggregators).execute()
class CombinedAUCVisualizer(BaseVisualizer): aggregator_map = dict( [("dummy", "Dummy")] + [(a.name(), a.display_name()) for a in [Aggregators.get(a)(None) for a in Aggregators.choices()]]) dataset_combined_visualizer = True def execute_result(self, result): result_data = pd.read_csv(result) job_hash = result.split("/")[-4] dataset = result.split("/")[-5] clazz = result.split("/")[-3] result_data["false_positive_rate"] = 1 - result_data["specificity"] result_data["false_discovery_rate"] = 1 - result_data["precision"] result_data = result_data.round(2) result_data = result_data[[ "recall", "false_positive_rate", "false_discovery_rate", "aggregator" ]] result_data["display_aggregator"] = result_data["aggregator"].apply( lambda x: self.aggregator_map[x]) result_data = result_data.drop("aggregator", axis=1) result_data["label"] = self.label_map[job_hash] result_data["dataset"] = dataset result_data["class"] = clazz return result_data def execute(self): df = None results = sorted(self.collect_results(self.data_dir)) # confusions = {} for result in mapper(self.execute_result, results): if df is None: df = result else: df = df.append(result) baseline_results = df[df["label"] == "Baseline"] other_results = df[df["label"] != "Baseline"] groups = other_results[[ "display_aggregator", "label", "dataset", "class" ]].drop_duplicates() for group in groups.iterrows(): group = group[1] clazz = group["class"] aggregator = group["display_aggregator"] dataset = group["dataset"] label = group["label"] group_baseline_results = baseline_results[ (baseline_results["dataset"] == dataset) & (baseline_results["class"] == clazz)] group_baseline_results = group_baseline_results[[ "recall", "false_discovery_rate" ]] group_baseline_results = group_baseline_results.groupby( "recall").agg({ "false_discovery_rate": 'min' }).reset_index() group_baseline_results = group_baseline_results.sort_values( "recall") group_results = other_results[ (other_results["display_aggregator"] == aggregator) & (other_results["label"] == label) & (other_results["dataset"] == dataset) & (other_results["class"] == clazz)] group_results = group_results[["recall", "false_discovery_rate"]] group_results = group_results.groupby("recall").agg({ "false_discovery_rate": 'min' }).reset_index() group_results = group_results.append( { "recall": 0., "false_discovery_rate": 0. }, ignore_index=True) group_results = group_results.append( { "recall": 1., "false_discovery_rate": 1. }, ignore_index=True) group_results = group_results.sort_values("recall") recall = group_results["recall"] fdr = group_results["false_discovery_rate"] auc_fdr = np.trapz(recall, fdr) #Plot False-Discovery Rate fig, ax = self.visualize(recall, fdr) ax.set_xlabel('False Discovery Rate (1 - Precision)') ax.set_ylabel('True Positive Rate (Sensitivity)') ax.plot(group_baseline_results["false_discovery_rate"], group_baseline_results["recall"], "o") subtitle = "{} - Class {}, {} Aggregator".format( label, clazz, aggregator) plt.figtext(.5, .97, subtitle, fontsize=14, ha='center') plt.title( 'True Positive Rate vs. False Discovery Rate (AUC {:1.2f})'. format(round(auc_fdr, 3)), y=1.17, fontsize=16) plt.legend([label, "Baseline"], bbox_to_anchor=(0, 1, 1, 0.2), loc="lower left", ncol=2, frameon=False) outdir = os.path.join(self.data_dir, "combined", "results", label, clazz) os.makedirs(outdir, exist_ok=True) outfile = os.path.join( outdir, "{}-auc-false-discovery.png".format("_".join( aggregator.split()))) print(outfile) plt.savefig(outfile, dpi=150, bbox_inches='tight', pad_inches=0.5) plt.close() def visualize(self, tpr, fpr): f, ax = plt.subplots() ax.plot(fpr, tpr, marker='o') ax.set_ylim([0, 1]) ax.set_xlim([0, max(fpr)]) return f, ax def collect_results(self, directory): return glob.glob("{}/**/metrics.csv".format(directory), recursive=True)
class BestThresholdVisualizer(BaseVisualizer): aggregator_map = dict( [("dummy", "")] + [(a.name(), a.display_name()) for a in [Aggregators.get(a)(None) for a in Aggregators.choices()]]) @staticmethod def label_bars(plot): for rect in plot.patches: height = rect.get_height() plot.annotate( '{:1.2f}'.format(height), xy=(rect.get_x() + rect.get_width() / 2, height), xytext=(0, -12), # 3 points vertical offset textcoords="offset points", ha='center', va='bottom') def execute_metrics(self): results = self.results.groupby("display_aggregator").agg({ "f1-score": 'max', "iou_score": 'max' }).reset_index().sort_values(by=["display_aggregator"]) plot = results.plot.bar(x="display_aggregator", y=["f1-score", "iou_score"]) BestThresholdVisualizer.label_bars(plot) title = "Best Metrics" subtitle = "{} - Class {}".format(self.label, self.clazz) fig = plot.get_figure() plt.title('') fig.suptitle(title, y=1.05, fontsize=14) plt.figtext(.5, .96, subtitle, fontsize=12, ha='center') plot.set_ylabel('Metric') plot.set_xlabel('') plt.legend(["F1-Score", "IOU"], bbox_to_anchor=(0, 1, 1, 0.2), loc="lower left", ncol=2, frameon=False) outfile = os.path.join(self.data_dir, "best_metrics.png") fig.savefig(outfile, dpi=150, bbox_inches='tight', pad_inches=0.5) plt.close() def execute_threshold(self): results = self.results.groupby("display_aggregator").agg({ "f1-score": 'max' }).reset_index() results = self.results.merge(results).sort_values( by=["display_aggregator"]) plot = results.plot.bar(x="display_aggregator", y="threshold") BestThresholdVisualizer.label_bars(plot) title = "Best Threshold" subtitle = "{} - Class {}".format(self.label, self.clazz) fig = plot.get_figure() plt.title('') fig.suptitle(title, y=1.05, fontsize=14) plt.figtext(.5, .96, subtitle, fontsize=12, ha='center') plot.set_ylabel('Threshold') plot.set_xlabel('') plt.legend(["threshold"], bbox_to_anchor=(0, 1, 1, 0.2), loc="lower left", ncol=1, frameon=False) outfile = os.path.join(self.data_dir, "best_threshold.png") fig.savefig(outfile, dpi=150, bbox_inches='tight', pad_inches=0.5) plt.close() def execute(self): csv_file = os.path.join(self.data_dir, "metrics.csv") self.clazz = self.data_dir.split("/")[-2] if not os.path.exists(csv_file): print("CSV file does not exist {}".format(csv_file)) return self.results = pd.read_csv(csv_file) self.results["display_aggregator"] = self.results["aggregator"].apply( lambda x: self.aggregator_map[x]) self.execute_metrics() self.execute_threshold()
class CombinedF1Visualizer(BaseVisualizer): full_combined_visualizer = True aggregator_map = dict( [("dummy", "")] + [(a.name(), a.display_name()) for a in [Aggregators.get(a)(None) for a in Aggregators.choices()]]) def execute_result(self, result): result_data = pd.read_csv(result) job_hash = result.split("/")[-4] dataset = result.split("/")[-5] clazz = result.split("/")[-3] result_data["label"] = self.label_map[job_hash] result_data["dataset"] = dataset result_data["class"] = clazz return result_data def execute(self): # self.labels = ["background"] + self.job_config["CLASSES"] df = None results = sorted(self.collect_results(self.data_dir)) # confusions = {} for result in mapper(self.execute_result, results): if df is None: df = result else: df = df.append(result) df["display_aggregator"] = df["aggregator"].apply( lambda x: self.aggregator_map[x]) df = df.drop("aggregator", axis=1) mean_results = df.groupby( ["label", "dataset", "display_aggregator", "threshold", "class"]).agg({ "dice": "mean" }).reset_index() best_results = mean_results.groupby( ["label", "dataset", "display_aggregator", "class"]).agg({ "dice": "max" }).reset_index() best_results = pd.merge(best_results, mean_results, on=list(best_results.columns), how='inner') join_columns = list(best_results.columns) join_columns.remove("dice") filtered_results = pd.merge(best_results, df, on=join_columns, how='inner') filtered_results["dice"] = filtered_results["dice_y"] filtered_results = filtered_results.drop("dice_x", axis=1) filtered_results = filtered_results.drop("dice_y", axis=1) baseline_results = df[df["label"] == "Baseline"] sns.set(rc={'figure.figsize': (11, 2.5)}) for aggregator in filtered_results["display_aggregator"].unique(): if aggregator == "": continue aggregator_results = filtered_results[ filtered_results["display_aggregator"] == aggregator] comparable_results = pd.concat( [aggregator_results, baseline_results]) plot = sns.boxplot(x='class', y='dice', data=comparable_results, hue='label') fig = plot.get_figure() plt.legend(bbox_to_anchor=(0, 1, 1, 0.2), loc="lower left", ncol=len(comparable_results["label"].unique()), frameon=False) title = 'F1-Score by Model and Class, {} Aggregator'.format( aggregator) plt.title('') fig.suptitle(title, y=1.08, fontsize=14) plt.xlabel("Class") plt.ylabel("F1-Score") outdir = os.path.join(self.data_dir, "combined", "results") os.makedirs(outdir, exist_ok=True) outfile = os.path.join( outdir, "{}-f1-score.png".format(aggregator.replace(" ", "_").lower())) fig.savefig(outfile, dpi=300, bbox_inches='tight', pad_inches=0.5) plt.close() def collect_results(self, directory): return glob.glob("{}/**/instance-metrics.csv".format(directory), recursive=True)
class ConfusionVisualizer(BaseVisualizer): job_combined_visualizer = True aggregator_map = dict( [("dummy", "")] + [(a.name(), a.display_name()) for a in [Aggregators.get(a)(None) for a in Aggregators.choices()]]) def execute_result(self, result): confusion = np.load(result) clazz = result.split("/")[-5] aggregator_name = result.split("/")[-3] threshold = result.split("/")[-2] return clazz, aggregator_name, threshold, confusion def executre_metrics(self, result): result_data = pd.read_csv(result) job_hash = result.split("/")[-4] dataset = result.split("/")[-5] clazz = result.split("/")[-3] result_data["label"] = self.label_map[job_hash] result_data["dataset"] = dataset result_data["class"] = clazz return result_data def find_best_thresholds(self): df = None metrics_files = glob.glob("{}/**/metrics.csv".format(self.data_dir), recursive=True) results = sorted(metrics_files) # confusions = {} df = None for result in mapper(self.executre_metrics, results): if df is None: df = result else: df = df.append(result) mean_results = df.groupby( ["label", "dataset", "aggregator", "threshold", "class"]).agg({ "f1-score": "mean" }).reset_index() best_results = mean_results.groupby( ["label", "dataset", "aggregator", "class"]).agg({ "f1-score": "max" }).reset_index() best_results = pd.merge(best_results, mean_results, on=list(best_results.columns), how='inner') return best_results def execute(self): self.labels = ["background"] + self.job_config["CLASSES"] bt = self.find_best_thresholds() for aggregator in bt["aggregator"].unique(): agregator_results = bt[bt["aggregator"] == aggregator] outdir = os.path.join(self.data_dir, "combined", "results", aggregator) os.makedirs(outdir, exist_ok=True) outfile = os.path.join(outdir, 'confusion.png') confusion = np.zeros((len(self.labels), len(self.labels)), dtype=np.uint64) for clazz in agregator_results["class"].unique(): best_threshold = bt[ (bt["aggregator"] == aggregator) & (bt["class"] == clazz)]["threshold"].iloc[0] confusion_file = os.path.join(self.data_dir, str(clazz), "results", aggregator, "{:.2f}".format(best_threshold), "confusion.npy") result_data = np.load(confusion_file) confusion += result_data confusion = confusion.astype(np.float64) confusion = np.round( confusion / (confusion.sum(axis=0) + np.finfo(confusion.dtype).eps) * 100, 1) confusion_matrix_display = ConfusionMatrixDisplay( confusion, display_labels=self.labels).plot() for row in confusion_matrix_display.text_: for item in row: item.set_fontsize(20) subtitle = "{}".format(self.label) ax = confusion_matrix_display.ax_ for item in ([ax.xaxis.label, ax.yaxis.label] + ax.get_xticklabels() + ax.get_yticklabels()): item.set_fontsize(14) fig = confusion_matrix_display.figure_ plt.title('') fig.suptitle('Confusion Matrix', y=1, fontsize=14) plt.figtext(.5, .91, subtitle, fontsize=12, ha='center') fig.savefig(outfile, dpi=150, bbox_inches='tight', pad_inches=0.5) plt.close()