def shade_y(y, x1, x2, where=None, color="tab:blue", label="", alpha=1, ax=None, **kws): """Shade over y-axis. Parameters ---------- y : {numpy.array, pandas.core.series.Series} The vector of y-axis points across which the shade will be drawn. x1 : {numpy.array, pandas.core.series.Series} The vector of x-axis points which define the left of the shade. x2 : {numpy.array, pandas.core.series.Series} The vector of x-axis points which define the right of the shade. where : array of bool, None by default Shaded regions are specified by True values. color : string, 'tab:blue' by default The color of the box. label : string, '' (empty) by default The label within a potential legend. alpha : float, 1.0 by default The transparency of the color. Values between 0 (transparent) and 1 (opague) are allowed. ax : matplotlib.pyplot.Axes, None by default The axis onto which the box is drawn. If left as None, matplotlib.pyplot.gca() is called to get the current `Axes`. Returns ------- out : matplotlib.pyplot.Axes The `Axes` onto which the box was drawn. """ _, y, ax = check_data(None, y, ax) # TODO (ear): validate x1 and x2 out = ax.fill_betweenx(y, x1, x2, color=color, label=label, alpha=alpha, where=where, **kws) return out
def line(x, y, color="tab:blue", label="", style="-", size=1.5, alpha=1.0, ax=None, **kws): """Draw line. Parameters ---------- x : {numpy.array, pandas.core.series.Series} The vector of x-axis points for the line is drawn. y : {numpy.array, pandas.core.series.Series} The vector of y-axis points for the curve is drawn. color : string, 'tab:blue' by default The color of the box. label : string, '' (empty) by default The label within a potential legend. style : string, '-' by default The line style of the curve. size : float, 1.5 by default The line width of the curve. alpha : float, 1.0 by default The transparency of the color. Values between 0 (transparent) and 1 (opague) are allowed. ax : matplotlib.pyplot.Axes, None by default The axis onto which the box is drawn. If left as None, matplotlib.pyplot.gca() is called to get the current `Axes`. Returns ------- out : matplotlib.pyplot.Axes The `Axes` onto which the box was drawn. """ x, y, ax = check_data(x, y, ax) out = ax.plot(x, y, color=color, label=label, linestyle=style, linewidth=size, alpha=alpha, **kws) return out
def scatter(x, y, color="tab:blue", label="", style="o", size=1.0, alpha=1.0, ax=None, **kws): """Draw scatter plot. Parameters ---------- x : {numpy.array, pandas.core.series.Series} The vector of x-axis data for which points are drawn. y : {numpy.array, pandas.core.series.Series} The vector of y-axis data for which points are drawn. color : string, 'tab:blue' by default The color of the box. label : string, '' (empty) by default The label within a potential legend. style : string, 'o' by default The shape of the points to draw. size : int, 1 by default The size of the points to draw. In matplotlib terms, this is equivalent to mpl.rcParam['lines.markersize'] = 6**(size + 1). alpha : float, 1.0 by default The transparency of the color. Values between 0 (transparent) and 1 (opague) are allowed. ax : matplotlib.pyplot.Axes, None by default The axis onto which the box is drawn. If left as None, matplotlib.pyplot.gca() is called to get the current `Axes`. Returns ------- out : matplotlib.pyplot.Axes The `Axes` onto which the box was drawn. """ x, y, ax = check_data(x, y, ax) out = ax.scatter(x, y, c=color, label=label, marker=style, s=6**(size + 1), alpha=alpha, **kws) return out
def autocorrelation(x, color="tab:blue", alpha=1, label="", ax=None, **kws): """Draw lag plot. Parameters ---------- x : {numpy.array, pandas.core.series.Series} The location along the x-axis at which the vertical box is placed. # TODO(ear) complete doc color : string, 'tab:blue' by default The color of the box. label : string, '' (empty) by default The label within a potential legend. ax : matplotlib.pyplot.Axes, None by default The axis onto which the box is drawn. If left as None, matplotlib.pyplot.gca() is called to get the current `Axes`. Returns ------- out : matplotlib.pyplot.Axes The `Axes` onto which the box was drawn. """ x, _, ax = check_data(x, None, ax) acor = _autocorrelation(x) # TODO(ear) add standard error bars, 1.96/sqrt(len(X)) for i, ac in np.ndenumerate(acor): # TODO(ear) add v/hline if ac >= 0: ax.vlines(i[0] + 1, ymin=0, ymax=ac, color=color, alpha=alpha, label=label, **kws) else: ax.vlines(i[0] + 1, ymin=ac, ymax=0, color=color, alpha=alpha, label=label, **kws) return ax
def rug(x, y=0, color="tab:blue", label="", alpha=1.0, ax=None, **kws): """Draw rug. Parameters ---------- x : {numpy.array, pandas.core.series.Series} The vector of x-axis data for the rug is drawn. y : int, 0 by default The y-axis specifying the base of the rug. color : string, 'tab:blue' by default The color of the rug. label : string, '' (empty) by default The label within a potential legend. alpha : float, 1.0 by default The transparency of the color. Values between 0 (transparent) and 1 (opague) are allowed. ax : matplotlib.pyplot.Axes, None by default The axis onto which the box is drawn. If left as None, matplotlib.pyplot.gca() is called to get the current `Axes`. Returns ------- out : matplotlib.pyplot.Axes The `Axes` onto which the box was drawn. """ x, _, ax = check_data(x, None, ax) out = ax.plot( x, [y] * len(x), marker="|", linestyle="", c=color, alpha=alpha, **kws ) return out
def std_h( x, y, z=1.96, z_inner=0.675, color="tab:blue", label="", style="o", alpha=1.0, ax=None, ): """Draw horizontal standard deviation intervals. Parameters ---------- x : {numpy.array, pandas.core.series.Series} The vector of data for which the standard deviation interval is sought. y : int The location along the y-axis at which the vertical interval is placed. z_inner : float, 0.675 by default The number of standard deviations from the mean for the inner bound. color : string, 'tab:blue' by default The color of the rug. label : string, '' (empty) by default The label within a potential legend. style : string, 'o' by default The shape of the mean point. alpha : float, 1.0 by default The transparency of the color. Values between 0 (transparent) and 1 (opague) are allowed. ax : matplotlib.pyplot.Axes, None by default The axis onto which the box is drawn. If left as None, matplotlib.pyplot.gca() is called to get the current `Axes`. Returns ------- out : matplotlib.pyplot.Axes The `Axes` onto which the box was drawn. """ x, _, ax = check_data(x, None, ax) xbar, std = np.mean(x), np.std(x) lw_mid, uw_mid = xbar - z_inner * std, xbar + z_inner * std lw, uw = xbar - z * std, xbar + z * std line_h(y, lw, uw, color=color, alpha=alpha) line_h(y, lw_mid, uw_mid, lw=4, color=color, alpha=alpha) out = point(xbar, y, color=color, label=label, size=2, style=style, alpha=alpha) return out
def lv( x, y, color="tab:blue", label="", widths=0.8, p=0.007, scale="linear", k_depth="trustworthy", ax=None, ): """Draw vertical letter value plot. Parameters ---------- x : int The location along the x-axis at which the vertical letter value plot is placed. y : {numpy.array, pandas.core.series.Series} The vector of data for which the percentiles are sought. color : string, 'tab:blue' by default The color of the boxes. label : string, '' (empty) by default The label within a potential legend. widths : float, 0.8 by default The width of the median box. scale : string, 'linear' by default Options are exponential, linear, or area. k_depth : string, 'proportion' by default Options are proportion, tukey, or trustworthy. ax : matplotlib.pyplot.Axes, None by default The axis onto which the letter value plot is drawn. If left as None, matplotlib.pyplot.gca() is called to get the current `Axes`. Returns ------- out : matplotlib.pyplot.Axes The `Axes` onto which the box was drawn. """ _, y, ax = check_data(None, y, ax) if ax is None: ax = plt.gca() n = y.size k_dict = { "proportion": (np.log2(n)) - int(np.log2(n * p)) + 1, "tukey": (np.log2(n)) - 3, "trustworthy": (np.log2(n) - np.log2(2 * stats.norm.ppf((1 - p)) ** 2)) + 1, } k = k_dict[k_depth] try: k = int(k) except ValueError: k = 1 # If the number happens to be less than 0, set k to 0 if k < 1.0: k = 1 upper = [100 * (1 - 0.5 ** (i + 2)) for i in range(k, -1, -1)] lower = [100 * (0.5 ** (i + 2)) for i in range(k, -1, -1)] # Stitch the box ends together percentile_ends = [(i, j) for i, j in zip(lower, upper)] box_ends = [np.percentile(y, q) for q in percentile_ends] # Dictionary of functions for computing the width of the boxes width_functions = { "linear": lambda h, i, k: (i - 1.0) / k, "exponential": lambda h, i, k: 2 ** (-k + i - 1), "area": lambda h, i, k: (1 - 2 ** (-k + i - 2)) / h, } # Anonymous functions for calculating the width and height # of the letter value boxes width = width_functions[scale] # Function to find height of boxes def height(b): return b[1] - b[0] # Functions to construct the letter value boxes def vert_perc_box(x, b, i, k, w): rect = Patches.FancyBboxPatch( (x - widths * w / 2, b[0]), widths * w, height(b), fill=True, boxstyle="round,pad=0.05", ) return rect def horz_perc_box(x, b, i, k, w): rect = Patches.Rectangle( (b[0], x - widths * w / 2), height(b), widths * w, fill=True ) return rect # Scale the width of the boxes so the biggest starts at 1 w_area = np.array([width(height(b), i, k) for i, b in enumerate(box_ends)]) w_area /= np.max(w_area) # Calculate the median median_y = np.median(y) # Calculate the outliers and plot perc_ends = (100 * (0.5 ** (k + 2)), 100 * (1 - 0.5 ** (k + 2))) edges = np.percentile(y, perc_ends) lower_out = y[np.where(y < edges[0])[0]] upper_out = y[np.where(y > edges[1])[0]] outliers = np.concatenate((lower_out, upper_out)) boxes = [ vert_perc_box(x, b[0], i, k, b[1]) for i, b in enumerate(zip(box_ends, w_area)) ] # Plot the medians half_width = boxes[k].get_extents().width / 2 ax.plot([x - half_width, x + half_width], [median_y, median_y], c=".15", alpha=0.45) # Plot the outliers color = mpl.colors.to_hex(color) ax.scatter(np.repeat(x, len(outliers)), outliers, marker="*", c=color) rgb = [[1, 1, 1], mpl.colors.to_rgb(color)] cmap = mpl.colors.LinearSegmentedColormap.from_list("new_map", rgb) collection = PatchCollection(boxes[1:], cmap=cmap, edgecolors=([0, 0, 0, 0.45],)) collection.set_array(np.array(np.linspace(0, 1, len(boxes)))) ax.add_collection(collection) return ax
def box_h(x, y, color="tab:blue", label="", style="o", alpha=1.0, ax=None, **kws): """Draw horizontal box plot. Parameters ---------- x : {numpy.array, pandas.core.series.Series} The vector of data for which the standard five number summary is sought. y : scalar The location along the x-axis at which the vertical box is placed. color : string, 'tab:blue' by default The color of the box. label : string, '' (empty) by default The label within a potential legend. style : string, 'o' by default The shape of the median within the box. alpha : float, 1.0 by default The transparency of the color. Values between 0 (transparent) and 1 (opague) are allowed. ax : matplotlib.pyplot.Axes, None by default The axis onto which the box is drawn. If left as None, matplotlib.pyplot.gca() is called to get the current `Axes`. Returns ------- out : matplotlib.pyplot.Axes The `Axes` onto which the box was drawn. """ x, _, ax = check_data(x, None, ax) q1, q2, q3, lw, uw = _bx(x) line_h(y, lw, uw, size=2, color=color, alpha=alpha) line_h(y, q1, q3, size=5, color=color, alpha=alpha) u_outs = x[np.where(x > uw)] l_outs = x[np.where(x < lw)] point(u_outs, np.repeat(y, u_outs.size), style="*", color=color, alpha=alpha) point(l_outs, np.repeat(y, l_outs.size), style="*", color=color, alpha=alpha) out = point(q2, y, size=2, style=style, color=color, label=label, alpha=alpha) return out
def histogram( x, color="tab:blue", label="", style="-", size=1.5, alpha=1.0, bins="auto", ax=None, **kws ): """Draw a histogrm. Parameters ---------- x : {numpy.array, pandas.core.series.Series} The vector of data from which the histogram is drawn. color : string, 'tab:blue' by default The color of the outline of the histogram. label : string, '' (empty) by default The label within a potential legend. style : string, '-' by default The line style of the curve. size : float, 1.5 by default The line width of the curve. alpha : float, 1.0 by default The transparency of the color. Values between 0 (transparent) and 1 (opague) are allowed. bins : {int, string}, 'auto' by default The number of bins. ax : matplotlib.pyplot.Axes, None by default The axis onto which the box is drawn. If left as None, matplotlib.pyplot.gca() is called to get the current `Axes`. Returns ------- (n, bins, patches) : 3-tuple `n` is ... TODO (ear) copy output from matplotlib `bins` ... `patches` ... """ x, _, ax = check_data(x, None, ax) if bins == "bayesian_blocks" or bins == "bb": bins = bayesian_blocks(x) n, bins, patches = ax.hist( x, color=color, label=label, linestyle=style, linewidth=size, alpha=alpha, density=True, histtype="step", bins=bins, )
def lag(x, lag=1, color="tab:blue", label="", style="o", size=1.0, alpha=1.0, ax=None, **kws): """Draw lag plot. Parameters ---------- x : {numpy.array, pandas.core.series.Series} The location along the x-axis at which the lags are drawn. lag : float, 1 by default The lag of the plot. color : string, 'tab:blue' by default The color of the box. label : string, '' (empty) by default The label within a potential legend. size : int, 1 by default The size of the points to draw. In matplotlib terms, this is equivalent to mpl.rcParam['lines.markersize'] = 6**(size + 1). style : string, 'o' by default The shape of the median within the box. alpha : float, 1.0 by default The transparency of the color. Values between 0 (transparent) and 1 (opague) are allowed. ax : matplotlib.pyplot.Axes, None by default The axis onto which the box is drawn. If left as None, matplotlib.pyplot.gca() is called to get the current `Axes`. Returns ------- out : matplotlib.pyplot.Axes The `Axes` onto which the box was drawn. """ x, _, ax = check_data(x, None, ax) y1 = x[:-lag] y2 = x[lag:] out = point(y1, y2, color=color, label=label, style=style, size=size, alpha=alpha, ax=ax, **kws) return out
def line_h(y, xmin=0, xmax=1, color="tab:blue", label="", style="-", size=1.5, alpha=1.0, ax=None, **kws): """Draw horizontal line at y. Parameters ---------- y : scalar The value on the y-axis for which the horizontal line is drawn. xmin : scalar, 0 by default The value on the x-axis for which the line starts. xmax : scalar, 1 by default The value on the x-axis for which the line ends. color : string, 'tab:blue' by default The color of the box. label : string, '' (empty) by default The label within a potential legend. style : string, '-' by default The line style of the curve. size : float, 1.5 by default The line width of the curve. alpha : float, 1.0 by default The transparency of the color. Values between 0 (transparent) and 1 (opague) are allowed. ax : matplotlib.pyplot.Axes, None by default The axis onto which the box is drawn. If left as None, matplotlib.pyplot.gca() is called to get the current `Axes`. Returns ------- out : matplotlib.pyplot.Axes The `Axes` onto which the box was drawn. """ _, y, ax = check_data(None, y, ax) out = ax.hlines(y, xmin, xmax, color=color, label=label, linestyle=style, linewidth=size, alpha=alpha, **kws) return out
def jitter( x, y, jitter_x=0.4, jitter_y=0.4, color="tab:blue", label="", style="o", size=1.0, alpha=1.0, ax=None, **kws ): """Draw jittered points. Parameters ---------- x : {numpy.array, pandas.core.series.Series} The vector of data for which jittered points are drawn. y : {numpy.array, pandas.core.series.Series} The vector of data for which jittered points are drawn. jitter_x : float, 0.4 by default The resolution of the jitter for the x-axis values. jitter_y : float, 0.4 by default The resolution of the jitter for the y-axis values. color : string, 'tab:blue' by default The color of the box. label : string, '' (empty) by default The label within a potential legend. style : string, 'o' by default The shape of the points to draw. size : int, 1 by default The size of the points to draw. In matplotlib terms, this is equivalent to mpl.rcParam['lines.markersize'] = 6**(size + 1). alpha : float, 1.0 by default The transparency of the color. Values between 0 (transparent) and 1 (opague) are allowed. ax : matplotlib.pyplot.Axes, None by default The axis onto which the box is drawn. If left as None, matplotlib.pyplot.gca() is called to get the current `Axes`. Returns ------- out : matplotlib.pyplot.Axes The `Axes` onto which the box was drawn. """ x, y, ax = check_data(x, y, ax) resolution_x = np.min(np.diff(np.sort(np.unique(x))).tolist() or 1) resolution_y = np.min(np.diff(np.sort(np.unique(x))).tolist() or 1) r_x = resolution_x / 2 r_y = resolution_y / 2 jx = jitter_x * np.random.uniform(low=-r_x, high=r_x, size=x.shape[0]) jy = jitter_y * np.random.uniform(low=-r_y, high=r_y, size=y.shape[0]) out = scatter( x + jx, y + jy, color=color, label=label, style=style, size=size, alpha=alpha, **kws ) return out
def percentile_h(x, y, outer=0.8, inner=0.5, color="tab:blue", label="", style="o", alpha=1, ax=None, **kws): """Draw horizontal percentile interval. Parameters ---------- x : {numpy.array, pandas.core.series.Series} The vector of data for which the `outer` percentile interval is sought. y : int The location along the y-axis at which the interval is placed. outer : float, 0.8 by default The outer interval percentage. inner : float, 0.5 by default The inner interval percentage. color : string, 'tab:blue' by default The color of the box. label : string, '' (empty) by default The label within a potential legend. style : string, 'o' by default The shape of the median within the box. alpha : float, 1.0 by default The transparency of the color. Values between 0 (transparent) and 1 (opague) are allowed. ax : matplotlib.pyplot.Axes, None by default The axis onto which the box is drawn. If left as None, matplotlib.pyplot.gca() is called to get the current `Axes`. Returns ------- out : matplotlib.pyplot.Axes The `Axes` onto which the box was drawn. """ x, _, ax = check_data(x, None, ax) alpha_l, alpha_lm = (1 - outer) / 2, (1 - inner) / 2 l, lm, m, um, u = alpha_l, alpha_lm, 0.5, 1 - alpha_lm, 1 - alpha_l q_l, q_lm, q_m, q_um, q_u = np.percentile( x, np.array([l, lm, m, um, u]) * 100) line_h(y, q_l, q_u, size=2, color=color, alpha=alpha) line_h(y, q_lm, q_um, size=5, color=color, alpha=alpha) out = point(q_m, y, size=2, style=style, color=color, label=label, alpha=alpha) return out
def violin_h( x, y, color="tab:blue", label="", bw_method=None, n=101, top_half=True, bottom_half=True, ax=None, **kws ): """Draw horizontal violin plot. Parameters ---------- x : {numpy.array, pandas.core.series.Series} The vector of data for which the violin plot is sought. y : int The location along the y-axis at which the horizontal violin is placed. color : string, 'tab:blue' by default The color of the line of the violin plot. label : string, '' (empty) by default The label within a potential legend. bw_method : string, None by default The bandwidth estimator method. If None, 'scott' is used. n : int, 101 by default The number of interpolation points. top_half : bool, True by default Whether or not to draw the top half of the violin plot. Draw only the bottom half by setting this to False. bottom_half : bool, True by default Whether or not to draw the top half of the violin plot. Draw only the bottom half by setting this to False. ax : matplotlib.pyplot.Axes, None by default The axis onto which the box is drawn. If left as None, matplotlib.pyplot.gca() is called to get the current `Axes`. Returns ------- ax : matplotlib.pyplot.Axes The `Axes` onto which the box was drawn. """ x, _, ax = check_data(x, None, ax) parts = ax.violinplot( x, positions=[y], bw_method=bw_method, vert=False, showmeans=False, showextrema=False, showmedians=False, points=n, ) if color: parts["bodies"][0].set_facecolor(color) if not top_half: vertices = parts["bodies"][0].get_paths()[0].vertices[:, 1] parts["bodies"][0].get_paths()[0].vertices[:, 1] = np.clip(vertices, -np.inf, y) if not bottom_half: vertices = parts["bodies"][0].get_paths()[0].vertices[:, 1] parts["bodies"][0].get_paths()[0].vertices[:, 1] = np.clip(vertices, y, np.inf) return ax
def violin( x, y, color="tab:blue", label="", bw_method=None, n=101, left_half=True, right_half=True, ax=None, **kws ): """Draw vertical violin plot. Parameters ---------- x : int The location along the x-axis at which the vertical violin is placed. y : {numpy.array, pandas.core.series.Series} The vector of data for which the violin plot is sought. color : string, 'tab:blue' by default The color of the line of the violin plot. label : string, '' (empty) by default The label within a potential legend. bw_method : string, None by default The bandwidth estimator method. If None, 'scott' is used. n : int, 101 by default The number of interpolation points. left_half : bool, True by default Whether or not to draw the left half of the violin plot. Draw only the right half by setting this to False. right_half : bool, True by default Whether or not to draw the right half of the violin plot. Draw only the left half by setting this to False. ax : matplotlib.pyplot.Axes, None by default The axis onto which the box is drawn. If left as None, matplotlib.pyplot.gca() is called to get the current `Axes`. Returns ------- ax : matplotlib.pyplot.Axes The `Axes` onto which the box was drawn. """ _, y, ax = check_data(None, y, ax) parts = ax.violinplot( y, positions=[x], bw_method=bw_method, showmeans=False, showextrema=False, showmedians=False, points=n, ) if color: for pc in parts["bodies"]: pc.set_facecolor(mpl.colors.to_rgb(color)) pc.set_alpha(1) if not left_half: vertices = parts["bodies"][0].get_paths()[0].vertices[:, 0] parts["bodies"][0].get_paths()[0].vertices[:, 0] = np.clip(vertices, x, np.inf) if not right_half: vertices = parts["bodies"][0].get_paths()[0].vertices[:, 0] parts["bodies"][0].get_paths()[0].vertices[:, 0] = np.clip(vertices, -np.inf, x) return ax
def mad( x, y, z=1.96, z_inner=0.675, color="tab:blue", label="", style="o", alpha=1.0, ax=None, ): """Draw vertical median absolute deviation intervals. Parameters ---------- x : int The location along the x-axis at which the vertical interval is placed. y : {numpy.array, pandas.core.series.Series} The vector of data for which the standard deviation is sought. z : float, 1.96 by default The number of standard deviations from the mean for the outer bound. z_inner : float, 0.675 by default The number of standard deviations from the mean for the inner bound. color : string, 'tab:blue' by default The color of the rug. label : string, '' (empty) by default The label within a potential legend. style : string, 'o' by default The shape of the mean point. alpha : float, 1.0 by default The transparency of the color. Values between 0 (transparent) and 1 (opague) are allowed. ax : matplotlib.pyplot.Axes, None by default The axis onto which the box is drawn. If left as None, matplotlib.pyplot.gca() is called to get the current `Axes`. Returns ------- out : matplotlib.pyplot.Axes The `Axes` onto which the box was drawn. """ _, y, ax = check_data(None, y, ax) med, mad = np.median(y), mad_std(y) / norm.ppf(0.75) lw_mid, uw_mid = med - z_inner * mad, med + z_inner * mad lw, uw = med - z * mad, med + z * mad line_v(x, lw, uw, size=2, color=color, alpha=alpha) line_v(x, lw_mid, uw_mid, size=5, color=color, alpha=alpha) out = point(x, med, size=2, style=style, color=color, label=label, alpha=alpha) return out