Esempio n. 1
0
def anscombe():
    """
    Creates 2x2 grid plot of the 4 anscombe datasets for illustration.
    """
    _, ((axa, axb), (axc, axd)) = plt.subplots(2,
                                               2,
                                               sharex="col",
                                               sharey="row")
    colors = get_color_cycle()

    for arr, ax, color in zip(ANSCOMBE, (axa, axb, axc, axd), colors):
        x = arr[0]
        y = arr[1]

        # Set the X and Y limits
        ax.set_xlim(0, 15)
        ax.set_ylim(0, 15)

        # Draw the points in the scatter plot
        ax.scatter(x, y, c=color)

        # Draw the linear best fit line on the plot
        draw_best_fit(x, y, ax, c=color)

    return (axa, axb, axc, axd)
Esempio n. 2
0
    def draw(self, y, y_pred):
        """
        Parameters
        ----------
        y : ndarray or Series of length n
            An array or series of target or class values

        y_pred : ndarray or Series of length n
            An array or series of predicted target values

        Returns
        -------
        ax : matplotlib Axes
            The axis with the plotted figure
        """

        # Some estimators particularly cross validation ones
        # tend to provide choice to use different metrics for scoring,
        # which we try to cater here
        # If not available it falls back to the default score of R2.
        try:
            score_label = self.estimator.scoring
            score_label = ' '.join(score_label.split('_')).capitalize()
        except AttributeError:
            score_label = "R2"

        if score_label == "R2":
            score_label = "$R^2$"

        label = "{} $ = {:0.3f}$".format(score_label, self.score_)

        self.ax.scatter(y,
                        y_pred,
                        c=self.colors["point"],
                        alpha=self.alpha,
                        label=label)

        # TODO If score happens inside a loop, draw gets called multiple times.
        # Ideally we'd want the best fit line to be drawn only once
        if self.bestfit:
            draw_best_fit(
                y,
                y_pred,
                self.ax,
                "linear",
                ls="--",
                lw=2,
                c=self.colors["line"],
                label="best fit",
            )

        # Set the axes limits based on the overall max/min values of
        # concatenated X and Y data
        # NOTE: shared_limits will be accounted for in finalize()
        if self.shared_limits is True:
            self.ax.set_xlim(min(min(y), min(y_pred)),
                             max(max(y), max(y_pred)))
            self.ax.set_ylim(self.ax.get_xlim())

        return self.ax
Esempio n. 3
0
def datasaurus():
    """
    Creates 2x2 grid plot of 4 from the Datasaurus Dozen datasets for illustration.

    Citation:
    Justin Matejka, George Fitzmaurice (2017)
    Same Stats, Different Graphs: Generating Datasets with Varied Appearance and
    Identical Statistics through Simulated Annealing
    CHI 2017 Conference proceedings:
    ACM SIGCHI Conference on Human Factors in Computing Systems
    """
    _, ((axa, axb), (axc, axd)) = plt.subplots(2,
                                               2,
                                               sharex="col",
                                               sharey="row")
    colors = get_color_cycle()
    for arr, ax, color in zip(DATASAURUS, (axa, axb, axc, axd), colors):
        x = arr[0]
        y = arr[1]

        # Draw the points in the scatter plot
        ax.scatter(x, y, c=color)

        # Set the X and Y limits
        ax.set_xlim(0, 100)
        ax.set_ylim(0, 110)

        # Draw the linear best fit line on the plot
        draw_best_fit(x, y, ax, c=color)

    return (axa, axb, axc, axd)
Esempio n. 4
0
def datasaurus():
    """
    Creates 2x2 grid plot of 4 from the Datasaurus Dozen datasets for illustration.

    Citation:
    Justin Matejka, George Fitzmaurice (2017)
    Same Stats, Different Graphs: Generating Datasets with Varied Appearance and
    Identical Statistics through Simulated Annealing
    CHI 2017 Conference proceedings:
    ACM SIGCHI Conference on Human Factors in Computing Systems
    """
    _, ((axa, axb), (axc, axd)) =  plt.subplots(2, 2, sharex='col', sharey='row')
    colors = get_color_cycle()
    for arr, ax, color in zip(DATASAURUS, (axa, axb, axc, axd), colors):
        x = arr[0]
        y = arr[1]

        # Draw the points in the scatter plot
        ax.scatter(x, y, c=color)

        # Set the X and Y limits
        ax.set_xlim(0, 100)
        ax.set_ylim(0, 110)

        # Draw the linear best fit line on the plot
        draw_best_fit(x, y, ax, c=color)

    return (axa, axb, axc, axd)
    def draw(self, y, y_pred):
        """
        Parameters
        ----------
        y : ndarray or Series of length n
            An array or series of target or class values

        y_pred : ndarray or Series of length n
            An array or series of predicted target values

        Returns
        -------
        ax : matplotlib Axes
            The axis with the plotted figure
        """
        label = "$R^2 = {:0.3f}$".format(self.score_)
        self.ax.scatter(y,
                        y_pred,
                        c=self.colors["point"],
                        alpha=self.alpha,
                        label=label)

        # TODO If score happens inside a loop, draw gets called multiple times.
        # Ideally we'd want the best fit line to be drawn only once
        if self.bestfit:
            draw_best_fit(
                y,
                y_pred,
                self.ax,
                "linear",
                ls="--",
                lw=2,
                c=self.colors["line"],
                label="best fit",
            )

        # Set the axes limits based on the range of X and Y data
        # NOTE: shared_limits will be accounted for in finalize()
        # TODO: do better than add one for really small residuals
        self.ax.set_xlim(y.min() - 1, y.max() + 1)
        self.ax.set_ylim(y_pred.min() - 1, y_pred.max() + 1)

        return self.ax
Esempio n. 6
0
def anscombe():
    """
    Creates 2x2 grid plot of the 4 anscombe datasets for illustration.
    """
    fig, ((axa, axb), (axc, axd)) =  plt.subplots(2, 2, sharex='col', sharey='row')
    for arr, ax in zip(ANSCOMBE, (axa, axb, axc, axd)):
        x = arr[0]
        y = arr[1]

        # Set the X and Y limits
        ax.set_xlim(0, 15)
        ax.set_ylim(0, 15)

        # Draw the points in the scatter plot
        ax.scatter(x, y, c='g')

        # Draw the linear best fit line on the plot
        draw_best_fit(x, y, ax)

    return (axa, axb, axc, axd)
Esempio n. 7
0
def anscombe():
    """
    Creates 2x2 grid plot of the 4 anscombe datasets for illustration.
    """
    fig, ((axa, axb), (axc, axd)) =  plt.subplots(2, 2, sharex='col', sharey='row')
    colors = get_color_cycle()
    for arr, ax, color in zip(ANSCOMBE, (axa, axb, axc, axd), colors):
        x = arr[0]
        y = arr[1]

        # Set the X and Y limits
        ax.set_xlim(0, 15)
        ax.set_ylim(0, 15)

        # Draw the points in the scatter plot
        ax.scatter(x, y, c=color)

        # Draw the linear best fit line on the plot
        draw_best_fit(x, y, ax, c=color)

    return (axa, axb, axc, axd)
Esempio n. 8
0
    def draw_joint(self, X, y, **kwargs):
        """
        Draws the visualization for the joint axis.
        """

        if self.joint_args is None:
            self.joint_args = {}

        self.joint_args.setdefault("alpha", 0.4)
        facecolor = self.joint_args.pop("facecolor", "#dddddd")
        self.joint_ax.set_facecolor(facecolor)

        if self.joint_plot == "scatter":
            aspect = self.joint_args.pop("aspect", "auto")
            self.joint_ax.set_aspect(aspect)
            self.joint_ax.scatter(X, y, **self.joint_args)

            fit = self.joint_args.pop("fit", True)
            if fit:
                estimator = self.joint_args.pop("estimator", "linear")
                draw_best_fit(X, y, self.joint_ax, estimator)

        elif self.joint_plot == "hex":
            x_bins = self.joint_args.pop("x_bins", 50)
            y_bins = self.joint_args.pop("y_bins", 50)
            colormap = self.joint_args.pop("cmap", 'Blues')
            gridsize = int(np.mean([x_bins, y_bins]))

            xmin = X.min()
            xmax = X.max()
            ymin = y.min()
            ymax = y.max()

            self.joint_ax.hexbin(X,
                                 y,
                                 gridsize=gridsize,
                                 cmap=colormap,
                                 mincnt=1,
                                 **self.joint_args)
            self.joint_ax.axis([xmin, xmax, ymin, ymax])
Esempio n. 9
0
    def draw_joint(self, X, y, **kwargs):
        """
        Draws the visualization for the joint axis.
        """

        if self.joint_args is None:
            self.joint_args = {}

        self.joint_args.setdefault("alpha", 0.4)
        facecolor = self.joint_args.pop("facecolor", "#dddddd")
        self.joint_ax.set_facecolor(facecolor)

        if self.joint_plot == "scatter":
            aspect = self.joint_args.pop("aspect", "auto")
            self.joint_ax.set_aspect(aspect)
            self.joint_ax.scatter(X, y, **self.joint_args)

            fit = self.joint_args.pop("fit", True)
            if fit:
                estimator = self.joint_args.pop("estimator", "linear")
                draw_best_fit(X, y, self.joint_ax, estimator)

        elif self.joint_plot == "hex":
            x_bins = self.joint_args.pop("x_bins", 50)
            y_bins = self.joint_args.pop("y_bins", 50)
            colormap = self.joint_args.pop("cmap", 'Blues')
            gridsize = int(np.mean([x_bins, y_bins]))

            xmin = X.min()
            xmax = X.max()
            ymin = y.min()
            ymax = y.max()

            self.joint_ax.hexbin(X, y,
                gridsize=gridsize, cmap=colormap, mincnt=1, **self.joint_args
            )
            self.joint_ax.axis([xmin, xmax, ymin, ymax])