Exemple #1
0
    def execute(self) -> None:
        from segmenter.aggregators import Aggregators
        from segmenter.config import config_from_dir
        super(EvaluateTask, self).execute()

        job_configs = [
            d for d in os.listdir(self.output_dir)
            if os.path.isdir(os.path.join(self.output_dir, d))
        ]

        if self.args["classes"] is not None:
            self.classes = list(
                filter(lambda c: c in self.args["classes"], self.classes))

        for job_hash in job_configs:
            job_config, job_hash = config_from_dir(
                os.path.join(self.output_dir, job_hash))

            folds = ["all"] if job_config["FOLDS"] == 0 else [
                "fold{}".format(o) for o in range(job_config["FOLDS"])
            ]
            if job_config["BOOST_FOLDS"] > 0:
                boost_folds = [
                    "b{}".format(o)
                    for o in list(range(0, job_config["BOOST_FOLDS"] + 1))
                ]
                folds = [
                    "".join(o)
                    for o in itertools.product(*[self.folds, boost_folds])
                ]

            if self.args["folds"] is not None:
                folds = list(filter(lambda c: c in self.args["folds"], folds))

            if job_config["SEARCH"]:
                folds = ["fold0"]

            if len(folds) <= 1:
                aggregators = ["dummy"]
            else:
                aggregators = Aggregators.choices()

            if self.args["aggregators"] is not None:
                aggregators = list(
                    filter(lambda c: c in self.args["aggregators"],
                           aggregators))

            for clazz in self.classes:
                self.evaluator(clazz,
                               job_config,
                               job_hash,
                               self.data_dir,
                               self.output_dir,
                               self.weight_finder,
                               folds=folds,
                               aggregators=aggregators).execute()
Exemple #2
0
class CombinedAUCVisualizer(BaseVisualizer):

    aggregator_map = dict(
        [("dummy", "Dummy")] +
        [(a.name(), a.display_name())
         for a in [Aggregators.get(a)(None) for a in Aggregators.choices()]])

    dataset_combined_visualizer = True

    def execute_result(self, result):
        result_data = pd.read_csv(result)

        job_hash = result.split("/")[-4]
        dataset = result.split("/")[-5]
        clazz = result.split("/")[-3]

        result_data["false_positive_rate"] = 1 - result_data["specificity"]
        result_data["false_discovery_rate"] = 1 - result_data["precision"]
        result_data = result_data.round(2)
        result_data = result_data[[
            "recall", "false_positive_rate", "false_discovery_rate",
            "aggregator"
        ]]
        result_data["display_aggregator"] = result_data["aggregator"].apply(
            lambda x: self.aggregator_map[x])
        result_data = result_data.drop("aggregator", axis=1)

        result_data["label"] = self.label_map[job_hash]
        result_data["dataset"] = dataset
        result_data["class"] = clazz

        return result_data

    def execute(self):
        df = None
        results = sorted(self.collect_results(self.data_dir))
        # confusions = {}
        for result in mapper(self.execute_result, results):
            if df is None:
                df = result
            else:
                df = df.append(result)

        baseline_results = df[df["label"] == "Baseline"]
        other_results = df[df["label"] != "Baseline"]

        groups = other_results[[
            "display_aggregator", "label", "dataset", "class"
        ]].drop_duplicates()

        for group in groups.iterrows():
            group = group[1]
            clazz = group["class"]
            aggregator = group["display_aggregator"]
            dataset = group["dataset"]
            label = group["label"]

            group_baseline_results = baseline_results[
                (baseline_results["dataset"] == dataset)
                & (baseline_results["class"] == clazz)]
            group_baseline_results = group_baseline_results[[
                "recall", "false_discovery_rate"
            ]]
            group_baseline_results = group_baseline_results.groupby(
                "recall").agg({
                    "false_discovery_rate": 'min'
                }).reset_index()
            group_baseline_results = group_baseline_results.sort_values(
                "recall")

            group_results = other_results[
                (other_results["display_aggregator"] == aggregator)
                & (other_results["label"] == label)
                & (other_results["dataset"] == dataset)
                & (other_results["class"] == clazz)]
            group_results = group_results[["recall", "false_discovery_rate"]]
            group_results = group_results.groupby("recall").agg({
                "false_discovery_rate":
                'min'
            }).reset_index()
            group_results = group_results.append(
                {
                    "recall": 0.,
                    "false_discovery_rate": 0.
                }, ignore_index=True)
            group_results = group_results.append(
                {
                    "recall": 1.,
                    "false_discovery_rate": 1.
                }, ignore_index=True)
            group_results = group_results.sort_values("recall")
            recall = group_results["recall"]
            fdr = group_results["false_discovery_rate"]

            auc_fdr = np.trapz(recall, fdr)

            #Plot False-Discovery Rate
            fig, ax = self.visualize(recall, fdr)
            ax.set_xlabel('False Discovery Rate (1 - Precision)')
            ax.set_ylabel('True Positive Rate (Sensitivity)')
            ax.plot(group_baseline_results["false_discovery_rate"],
                    group_baseline_results["recall"], "o")
            subtitle = "{} - Class {}, {} Aggregator".format(
                label, clazz, aggregator)

            plt.figtext(.5, .97, subtitle, fontsize=14, ha='center')
            plt.title(
                'True Positive Rate vs. False Discovery Rate (AUC {:1.2f})'.
                format(round(auc_fdr, 3)),
                y=1.17,
                fontsize=16)

            plt.legend([label, "Baseline"],
                       bbox_to_anchor=(0, 1, 1, 0.2),
                       loc="lower left",
                       ncol=2,
                       frameon=False)

            outdir = os.path.join(self.data_dir, "combined", "results", label,
                                  clazz)
            os.makedirs(outdir, exist_ok=True)
            outfile = os.path.join(
                outdir, "{}-auc-false-discovery.png".format("_".join(
                    aggregator.split())))
            print(outfile)
            plt.savefig(outfile, dpi=150, bbox_inches='tight', pad_inches=0.5)
            plt.close()

    def visualize(self, tpr, fpr):
        f, ax = plt.subplots()
        ax.plot(fpr, tpr, marker='o')
        ax.set_ylim([0, 1])
        ax.set_xlim([0, max(fpr)])
        return f, ax

    def collect_results(self, directory):
        return glob.glob("{}/**/metrics.csv".format(directory), recursive=True)
Exemple #3
0
class BestThresholdVisualizer(BaseVisualizer):

    aggregator_map = dict(
        [("dummy", "")] +
        [(a.name(), a.display_name())
         for a in [Aggregators.get(a)(None) for a in Aggregators.choices()]])

    @staticmethod
    def label_bars(plot):
        for rect in plot.patches:
            height = rect.get_height()
            plot.annotate(
                '{:1.2f}'.format(height),
                xy=(rect.get_x() + rect.get_width() / 2, height),
                xytext=(0, -12),  # 3 points vertical offset
                textcoords="offset points",
                ha='center',
                va='bottom')

    def execute_metrics(self):
        results = self.results.groupby("display_aggregator").agg({
            "f1-score":
            'max',
            "iou_score":
            'max'
        }).reset_index().sort_values(by=["display_aggregator"])

        plot = results.plot.bar(x="display_aggregator",
                                y=["f1-score", "iou_score"])
        BestThresholdVisualizer.label_bars(plot)

        title = "Best Metrics"
        subtitle = "{} - Class {}".format(self.label, self.clazz)

        fig = plot.get_figure()
        plt.title('')
        fig.suptitle(title, y=1.05, fontsize=14)
        plt.figtext(.5, .96, subtitle, fontsize=12, ha='center')
        plot.set_ylabel('Metric')
        plot.set_xlabel('')

        plt.legend(["F1-Score", "IOU"],
                   bbox_to_anchor=(0, 1, 1, 0.2),
                   loc="lower left",
                   ncol=2,
                   frameon=False)

        outfile = os.path.join(self.data_dir, "best_metrics.png")
        fig.savefig(outfile, dpi=150, bbox_inches='tight', pad_inches=0.5)
        plt.close()

    def execute_threshold(self):

        results = self.results.groupby("display_aggregator").agg({
            "f1-score":
            'max'
        }).reset_index()
        results = self.results.merge(results).sort_values(
            by=["display_aggregator"])

        plot = results.plot.bar(x="display_aggregator", y="threshold")
        BestThresholdVisualizer.label_bars(plot)

        title = "Best Threshold"
        subtitle = "{} - Class {}".format(self.label, self.clazz)

        fig = plot.get_figure()
        plt.title('')
        fig.suptitle(title, y=1.05, fontsize=14)
        plt.figtext(.5, .96, subtitle, fontsize=12, ha='center')
        plot.set_ylabel('Threshold')
        plot.set_xlabel('')

        plt.legend(["threshold"],
                   bbox_to_anchor=(0, 1, 1, 0.2),
                   loc="lower left",
                   ncol=1,
                   frameon=False)

        outfile = os.path.join(self.data_dir, "best_threshold.png")
        fig.savefig(outfile, dpi=150, bbox_inches='tight', pad_inches=0.5)
        plt.close()

    def execute(self):
        csv_file = os.path.join(self.data_dir, "metrics.csv")
        self.clazz = self.data_dir.split("/")[-2]
        if not os.path.exists(csv_file):
            print("CSV file does not exist {}".format(csv_file))
            return
        self.results = pd.read_csv(csv_file)
        self.results["display_aggregator"] = self.results["aggregator"].apply(
            lambda x: self.aggregator_map[x])
        self.execute_metrics()
        self.execute_threshold()
Exemple #4
0
class CombinedF1Visualizer(BaseVisualizer):

    full_combined_visualizer = True

    aggregator_map = dict(
        [("dummy", "")] +
        [(a.name(), a.display_name())
         for a in [Aggregators.get(a)(None) for a in Aggregators.choices()]])

    def execute_result(self, result):
        result_data = pd.read_csv(result)

        job_hash = result.split("/")[-4]
        dataset = result.split("/")[-5]
        clazz = result.split("/")[-3]
        result_data["label"] = self.label_map[job_hash]
        result_data["dataset"] = dataset
        result_data["class"] = clazz
        return result_data

    def execute(self):
        # self.labels = ["background"] + self.job_config["CLASSES"]
        df = None
        results = sorted(self.collect_results(self.data_dir))
        # confusions = {}
        for result in mapper(self.execute_result, results):
            if df is None:
                df = result
            else:
                df = df.append(result)
        df["display_aggregator"] = df["aggregator"].apply(
            lambda x: self.aggregator_map[x])
        df = df.drop("aggregator", axis=1)

        mean_results = df.groupby(
            ["label", "dataset", "display_aggregator", "threshold",
             "class"]).agg({
                 "dice": "mean"
             }).reset_index()

        best_results = mean_results.groupby(
            ["label", "dataset", "display_aggregator", "class"]).agg({
                "dice":
                "max"
            }).reset_index()

        best_results = pd.merge(best_results,
                                mean_results,
                                on=list(best_results.columns),
                                how='inner')

        join_columns = list(best_results.columns)
        join_columns.remove("dice")

        filtered_results = pd.merge(best_results,
                                    df,
                                    on=join_columns,
                                    how='inner')
        filtered_results["dice"] = filtered_results["dice_y"]
        filtered_results = filtered_results.drop("dice_x", axis=1)
        filtered_results = filtered_results.drop("dice_y", axis=1)
        baseline_results = df[df["label"] == "Baseline"]

        sns.set(rc={'figure.figsize': (11, 2.5)})
        for aggregator in filtered_results["display_aggregator"].unique():
            if aggregator == "":
                continue
            aggregator_results = filtered_results[
                filtered_results["display_aggregator"] == aggregator]
            comparable_results = pd.concat(
                [aggregator_results, baseline_results])
            plot = sns.boxplot(x='class',
                               y='dice',
                               data=comparable_results,
                               hue='label')
            fig = plot.get_figure()

            plt.legend(bbox_to_anchor=(0, 1, 1, 0.2),
                       loc="lower left",
                       ncol=len(comparable_results["label"].unique()),
                       frameon=False)
            title = 'F1-Score by Model and Class, {} Aggregator'.format(
                aggregator)
            plt.title('')

            fig.suptitle(title, y=1.08, fontsize=14)
            plt.xlabel("Class")
            plt.ylabel("F1-Score")

            outdir = os.path.join(self.data_dir, "combined", "results")
            os.makedirs(outdir, exist_ok=True)

            outfile = os.path.join(
                outdir,
                "{}-f1-score.png".format(aggregator.replace(" ", "_").lower()))
            fig.savefig(outfile, dpi=300, bbox_inches='tight', pad_inches=0.5)
            plt.close()

    def collect_results(self, directory):
        return glob.glob("{}/**/instance-metrics.csv".format(directory),
                         recursive=True)
Exemple #5
0
class ConfusionVisualizer(BaseVisualizer):
    job_combined_visualizer = True

    aggregator_map = dict(
        [("dummy", "")] +
        [(a.name(), a.display_name())
         for a in [Aggregators.get(a)(None) for a in Aggregators.choices()]])

    def execute_result(self, result):
        confusion = np.load(result)

        clazz = result.split("/")[-5]
        aggregator_name = result.split("/")[-3]
        threshold = result.split("/")[-2]

        return clazz, aggregator_name, threshold, confusion

    def executre_metrics(self, result):
        result_data = pd.read_csv(result)

        job_hash = result.split("/")[-4]
        dataset = result.split("/")[-5]
        clazz = result.split("/")[-3]
        result_data["label"] = self.label_map[job_hash]
        result_data["dataset"] = dataset
        result_data["class"] = clazz
        return result_data

    def find_best_thresholds(self):
        df = None
        metrics_files = glob.glob("{}/**/metrics.csv".format(self.data_dir),
                                  recursive=True)
        results = sorted(metrics_files)
        # confusions = {}
        df = None
        for result in mapper(self.executre_metrics, results):
            if df is None:
                df = result
            else:
                df = df.append(result)

        mean_results = df.groupby(
            ["label", "dataset", "aggregator", "threshold", "class"]).agg({
                "f1-score":
                "mean"
            }).reset_index()

        best_results = mean_results.groupby(
            ["label", "dataset", "aggregator", "class"]).agg({
                "f1-score": "max"
            }).reset_index()

        best_results = pd.merge(best_results,
                                mean_results,
                                on=list(best_results.columns),
                                how='inner')

        return best_results

    def execute(self):
        self.labels = ["background"] + self.job_config["CLASSES"]
        bt = self.find_best_thresholds()

        for aggregator in bt["aggregator"].unique():
            agregator_results = bt[bt["aggregator"] == aggregator]

            outdir = os.path.join(self.data_dir, "combined", "results",
                                  aggregator)
            os.makedirs(outdir, exist_ok=True)
            outfile = os.path.join(outdir, 'confusion.png')
            confusion = np.zeros((len(self.labels), len(self.labels)),
                                 dtype=np.uint64)

            for clazz in agregator_results["class"].unique():
                best_threshold = bt[
                    (bt["aggregator"] == aggregator)
                    & (bt["class"] == clazz)]["threshold"].iloc[0]
                confusion_file = os.path.join(self.data_dir, str(clazz),
                                              "results", aggregator,
                                              "{:.2f}".format(best_threshold),
                                              "confusion.npy")
                result_data = np.load(confusion_file)
                confusion += result_data

            confusion = confusion.astype(np.float64)
            confusion = np.round(
                confusion /
                (confusion.sum(axis=0) + np.finfo(confusion.dtype).eps) * 100,
                1)
            confusion_matrix_display = ConfusionMatrixDisplay(
                confusion, display_labels=self.labels).plot()

            for row in confusion_matrix_display.text_:
                for item in row:
                    item.set_fontsize(20)
            subtitle = "{}".format(self.label)

            ax = confusion_matrix_display.ax_
            for item in ([ax.xaxis.label, ax.yaxis.label] +
                         ax.get_xticklabels() + ax.get_yticklabels()):
                item.set_fontsize(14)
            fig = confusion_matrix_display.figure_

            plt.title('')
            fig.suptitle('Confusion Matrix', y=1, fontsize=14)
            plt.figtext(.5, .91, subtitle, fontsize=12, ha='center')

            fig.savefig(outfile, dpi=150, bbox_inches='tight', pad_inches=0.5)
            plt.close()