Ejemplo n.º 1
0
    def draw(self):
        """
        Renders the classification report; must be called after score.
        """

        # Perform display related manipulations on the confusion matrix data
        cm_display = self.confusion_matrix_

        # Convert confusion matrix to percent of each row, i.e. the
        # predicted as a percent of true in each class.
        if self.percent is True:
            # Note: div_safe function returns 0 instead of NAN.
            cm_display = div_safe(self.confusion_matrix_,
                                  self.class_counts_.reshape(-1, 1))
            cm_display = np.round(cm_display * 100, decimals=0)

        # Y axis should be sorted top to bottom in pcolormesh
        cm_display = cm_display[::-1, ::]

        # Get the human readable labels
        labels = self._labels()
        if labels is None:
            labels = self.classes_

        # Set up the dimensions of the pcolormesh
        n_classes = len(labels)
        X, Y = np.arange(n_classes + 1), np.arange(n_classes + 1)
        self.ax.set_ylim(bottom=0, top=cm_display.shape[0])
        self.ax.set_xlim(left=0, right=cm_display.shape[1])

        # Fetch the grid labels from the classes in correct order; set ticks.
        xticklabels = labels
        yticklabels = labels[::-1]
        ticks = np.arange(n_classes) + 0.5

        self.ax.set(xticks=ticks, yticks=ticks)
        self.ax.set_xticklabels(xticklabels,
                                rotation="vertical",
                                fontsize=self.fontsize)
        self.ax.set_yticklabels(yticklabels, fontsize=self.fontsize)

        # Set data labels in the grid enumerating over all x,y class pairs.
        # NOTE: X and Y are one element longer than the confusion matrix, so
        # skip the last element in the enumeration to label grids.
        for x in X[:-1]:
            for y in Y[:-1]:

                # Extract the value and the text label
                value = cm_display[x, y]
                svalue = "{:0.0f}".format(value)
                if self.percent:
                    svalue += "%"

                # Determine the grid and text colors
                base_color = self.cmap(value / cm_display.max())
                text_color = find_text_color(base_color)

                # Make zero values more subtle
                if cm_display[x, y] == 0:
                    text_color = CMAP_MUTEDCOLOR

                # Add the label to the middle of the grid
                cx, cy = x + 0.5, y + 0.5
                self.ax.text(
                    cy,
                    cx,
                    svalue,
                    va="center",
                    ha="center",
                    color=text_color,
                    fontsize=self.fontsize,
                )

                # Add a dark line on the grid with the diagonal. Note that the
                # tick labels have already been reversed.
                lc = "k" if xticklabels[x] == yticklabels[y] else "w"
                self._edgecolors.append(lc)

        # Draw the heatmap with colors bounded by vmin,vmax
        vmin = 0.00001
        vmax = 99.999 if self.percent is True else cm_display.max()
        self.ax.pcolormesh(
            X,
            Y,
            cm_display,
            vmin=vmin,
            vmax=vmax,
            edgecolor=self._edgecolors,
            cmap=self.cmap,
            linewidth="0.01",
        )

        # Return the axes being drawn on
        return self.ax
Ejemplo n.º 2
0
    def ConfusionMatrixViz(self, percent=True):
        """
        Renders the classification report; must be called after score.
        """
        labels = [0, 1]
        confusion_matrix_ = confusion_matrix_metric(self.y_true,
                                                    self.y_pred,
                                                    labels=labels)
        class_counts_ = dict(zip(*np.unique(self.y_true, return_counts=True)))

        # Make array of only the classes actually being used.
        # Needed because sklearn confusion_matrix only returns counts for
        # selected classes but percent should be calculated on all classes
        selected_class_counts = []
        for c in labels:
            try:
                selected_class_counts.append(class_counts_[c])
            except KeyError:
                selected_class_counts.append(0)
        class_counts_ = np.array(selected_class_counts)

        # Perform display related manipulations on the confusion matrix data
        cm_display = confusion_matrix_

        # Convert confusion matrix to percent of each row, i.e. the
        # predicted as a percent of true in each class.
        if percent is True:
            # Note: div_safe function returns 0 instead of NAN.
            cm_display = div_safe(confusion_matrix_,
                                  class_counts_.reshape(-1, 1))
            cm_display = np.round(cm_display * 100, decimals=0)

        # Y axis should be sorted top to bottom in pcolormesh
        cm_display = cm_display[::-1, ::]

        # Set up the dimensions of the pcolormesh
        n_classes = len(self.classes)
        X, Y = np.arange(n_classes + 1), np.arange(n_classes + 1)

        fig, ax = plt.subplots(ncols=1, nrows=1)
        ax.set_ylim(bottom=0, top=cm_display.shape[0])
        ax.set_xlim(left=0, right=cm_display.shape[1])

        # Fetch the grid labels from the classes in correct order; set ticks.
        xticklabels = self.classes
        yticklabels = self.classes[::-1]
        ticks = np.arange(n_classes) + 0.5

        ax.set(xticks=ticks, yticks=ticks)
        ax.set_xticklabels(xticklabels,
                           rotation="vertical",
                           fontsize=self.fontsize)
        ax.set_yticklabels(yticklabels, fontsize=self.fontsize)

        # Set data labels in the grid enumerating over all x,y class pairs.
        # NOTE: X and Y are one element longer than the confusion matrix, so
        # skip the last element in the enumeration to label grids.
        for x in X[:-1]:
            for y in Y[:-1]:

                # Extract the value and the text label
                value = cm_display[x, y]
                svalue = "{:0.0f}".format(value)
                if percent:
                    svalue += "%"

                # Determine the grid and text colors
                base_color = self.cmap(value / cm_display.max())
                text_color = find_text_color(base_color)

                # Make zero values more subtle
                if cm_display[x, y] == 0:
                    text_color = "0.75"

                # Add the label to the middle of the grid
                cx, cy = x + 0.5, y + 0.5
                ax.text(
                    cy,
                    cx,
                    svalue,
                    va="center",
                    ha="center",
                    color=text_color,
                    fontsize=self.fontsize,
                )

                # Add a dark line on the grid with the diagonal. Note that the
                # tick labels have already been reversed.
                lc = "k" if xticklabels[x] == yticklabels[y] else "w"
                self._edgecolors.append(lc)

        # Draw the heatmap with colors bounded by vmin,vmax
        vmin = 0.00001
        vmax = 99.999 if percent is True else cm_display.max()
        ax.pcolormesh(
            X,
            Y,
            cm_display,
            vmin=vmin,
            vmax=vmax,
            edgecolor=self._edgecolors,
            cmap=self.cmap,
            linewidth="0.01",
        )
        ax.set_title("Confusion Matrix for {}".format(self.name))
        ax.set_ylabel("True Class")
        ax.set_xlabel("Predicted Class")

        # Call tight layout to maximize readability
        fig.tight_layout()
        fig.savefig(self.path_to_save + "/ConfusionMatrix_" + self.name +
                    ".pdf")
        # Return the axes being drawn on
        return ax