def anscombe(): """ Creates 2x2 grid plot of the 4 anscombe datasets for illustration. """ _, ((axa, axb), (axc, axd)) = plt.subplots(2, 2, sharex="col", sharey="row") colors = get_color_cycle() for arr, ax, color in zip(ANSCOMBE, (axa, axb, axc, axd), colors): x = arr[0] y = arr[1] # Set the X and Y limits ax.set_xlim(0, 15) ax.set_ylim(0, 15) # Draw the points in the scatter plot ax.scatter(x, y, c=color) # Draw the linear best fit line on the plot draw_best_fit(x, y, ax, c=color) return (axa, axb, axc, axd)
def draw(self, y, y_pred): """ Parameters ---------- y : ndarray or Series of length n An array or series of target or class values y_pred : ndarray or Series of length n An array or series of predicted target values Returns ------- ax : matplotlib Axes The axis with the plotted figure """ # Some estimators particularly cross validation ones # tend to provide choice to use different metrics for scoring, # which we try to cater here # If not available it falls back to the default score of R2. try: score_label = self.estimator.scoring score_label = ' '.join(score_label.split('_')).capitalize() except AttributeError: score_label = "R2" if score_label == "R2": score_label = "$R^2$" label = "{} $ = {:0.3f}$".format(score_label, self.score_) self.ax.scatter(y, y_pred, c=self.colors["point"], alpha=self.alpha, label=label) # TODO If score happens inside a loop, draw gets called multiple times. # Ideally we'd want the best fit line to be drawn only once if self.bestfit: draw_best_fit( y, y_pred, self.ax, "linear", ls="--", lw=2, c=self.colors["line"], label="best fit", ) # Set the axes limits based on the overall max/min values of # concatenated X and Y data # NOTE: shared_limits will be accounted for in finalize() if self.shared_limits is True: self.ax.set_xlim(min(min(y), min(y_pred)), max(max(y), max(y_pred))) self.ax.set_ylim(self.ax.get_xlim()) return self.ax
def datasaurus(): """ Creates 2x2 grid plot of 4 from the Datasaurus Dozen datasets for illustration. Citation: Justin Matejka, George Fitzmaurice (2017) Same Stats, Different Graphs: Generating Datasets with Varied Appearance and Identical Statistics through Simulated Annealing CHI 2017 Conference proceedings: ACM SIGCHI Conference on Human Factors in Computing Systems """ _, ((axa, axb), (axc, axd)) = plt.subplots(2, 2, sharex="col", sharey="row") colors = get_color_cycle() for arr, ax, color in zip(DATASAURUS, (axa, axb, axc, axd), colors): x = arr[0] y = arr[1] # Draw the points in the scatter plot ax.scatter(x, y, c=color) # Set the X and Y limits ax.set_xlim(0, 100) ax.set_ylim(0, 110) # Draw the linear best fit line on the plot draw_best_fit(x, y, ax, c=color) return (axa, axb, axc, axd)
def datasaurus(): """ Creates 2x2 grid plot of 4 from the Datasaurus Dozen datasets for illustration. Citation: Justin Matejka, George Fitzmaurice (2017) Same Stats, Different Graphs: Generating Datasets with Varied Appearance and Identical Statistics through Simulated Annealing CHI 2017 Conference proceedings: ACM SIGCHI Conference on Human Factors in Computing Systems """ _, ((axa, axb), (axc, axd)) = plt.subplots(2, 2, sharex='col', sharey='row') colors = get_color_cycle() for arr, ax, color in zip(DATASAURUS, (axa, axb, axc, axd), colors): x = arr[0] y = arr[1] # Draw the points in the scatter plot ax.scatter(x, y, c=color) # Set the X and Y limits ax.set_xlim(0, 100) ax.set_ylim(0, 110) # Draw the linear best fit line on the plot draw_best_fit(x, y, ax, c=color) return (axa, axb, axc, axd)
def draw(self, y, y_pred): """ Parameters ---------- y : ndarray or Series of length n An array or series of target or class values y_pred : ndarray or Series of length n An array or series of predicted target values Returns ------- ax : matplotlib Axes The axis with the plotted figure """ label = "$R^2 = {:0.3f}$".format(self.score_) self.ax.scatter(y, y_pred, c=self.colors["point"], alpha=self.alpha, label=label) # TODO If score happens inside a loop, draw gets called multiple times. # Ideally we'd want the best fit line to be drawn only once if self.bestfit: draw_best_fit( y, y_pred, self.ax, "linear", ls="--", lw=2, c=self.colors["line"], label="best fit", ) # Set the axes limits based on the range of X and Y data # NOTE: shared_limits will be accounted for in finalize() # TODO: do better than add one for really small residuals self.ax.set_xlim(y.min() - 1, y.max() + 1) self.ax.set_ylim(y_pred.min() - 1, y_pred.max() + 1) return self.ax
def anscombe(): """ Creates 2x2 grid plot of the 4 anscombe datasets for illustration. """ fig, ((axa, axb), (axc, axd)) = plt.subplots(2, 2, sharex='col', sharey='row') for arr, ax in zip(ANSCOMBE, (axa, axb, axc, axd)): x = arr[0] y = arr[1] # Set the X and Y limits ax.set_xlim(0, 15) ax.set_ylim(0, 15) # Draw the points in the scatter plot ax.scatter(x, y, c='g') # Draw the linear best fit line on the plot draw_best_fit(x, y, ax) return (axa, axb, axc, axd)
def anscombe(): """ Creates 2x2 grid plot of the 4 anscombe datasets for illustration. """ fig, ((axa, axb), (axc, axd)) = plt.subplots(2, 2, sharex='col', sharey='row') colors = get_color_cycle() for arr, ax, color in zip(ANSCOMBE, (axa, axb, axc, axd), colors): x = arr[0] y = arr[1] # Set the X and Y limits ax.set_xlim(0, 15) ax.set_ylim(0, 15) # Draw the points in the scatter plot ax.scatter(x, y, c=color) # Draw the linear best fit line on the plot draw_best_fit(x, y, ax, c=color) return (axa, axb, axc, axd)
def draw_joint(self, X, y, **kwargs): """ Draws the visualization for the joint axis. """ if self.joint_args is None: self.joint_args = {} self.joint_args.setdefault("alpha", 0.4) facecolor = self.joint_args.pop("facecolor", "#dddddd") self.joint_ax.set_facecolor(facecolor) if self.joint_plot == "scatter": aspect = self.joint_args.pop("aspect", "auto") self.joint_ax.set_aspect(aspect) self.joint_ax.scatter(X, y, **self.joint_args) fit = self.joint_args.pop("fit", True) if fit: estimator = self.joint_args.pop("estimator", "linear") draw_best_fit(X, y, self.joint_ax, estimator) elif self.joint_plot == "hex": x_bins = self.joint_args.pop("x_bins", 50) y_bins = self.joint_args.pop("y_bins", 50) colormap = self.joint_args.pop("cmap", 'Blues') gridsize = int(np.mean([x_bins, y_bins])) xmin = X.min() xmax = X.max() ymin = y.min() ymax = y.max() self.joint_ax.hexbin(X, y, gridsize=gridsize, cmap=colormap, mincnt=1, **self.joint_args) self.joint_ax.axis([xmin, xmax, ymin, ymax])
def draw_joint(self, X, y, **kwargs): """ Draws the visualization for the joint axis. """ if self.joint_args is None: self.joint_args = {} self.joint_args.setdefault("alpha", 0.4) facecolor = self.joint_args.pop("facecolor", "#dddddd") self.joint_ax.set_facecolor(facecolor) if self.joint_plot == "scatter": aspect = self.joint_args.pop("aspect", "auto") self.joint_ax.set_aspect(aspect) self.joint_ax.scatter(X, y, **self.joint_args) fit = self.joint_args.pop("fit", True) if fit: estimator = self.joint_args.pop("estimator", "linear") draw_best_fit(X, y, self.joint_ax, estimator) elif self.joint_plot == "hex": x_bins = self.joint_args.pop("x_bins", 50) y_bins = self.joint_args.pop("y_bins", 50) colormap = self.joint_args.pop("cmap", 'Blues') gridsize = int(np.mean([x_bins, y_bins])) xmin = X.min() xmax = X.max() ymin = y.min() ymax = y.max() self.joint_ax.hexbin(X, y, gridsize=gridsize, cmap=colormap, mincnt=1, **self.joint_args ) self.joint_ax.axis([xmin, xmax, ymin, ymax])