def test_desaturate(): """Test color desaturation.""" out1 = utils.desaturate("red", .5) assert out1 == (.75, .25, .25) out2 = utils.desaturate("#00FF00", .5) assert out2 == (.25, .75, .25) out3 = utils.desaturate((0, 0, 1), .5) assert out3 == (.25, .25, .75) out4 = utils.desaturate("red", .5) assert out4 == (.75, .25, .25)
def _box_colors(vals, color): """Find colors to use for boxplots or violinplots.""" if color is None: colors = husl_palette(len(vals), l=.7) else: try: color = mpl.colors.colorConverter.to_rgb(color) colors = [color for _ in vals] except ValueError: colors = color_palette(color, len(vals)) # Desaturate a bit because these are patches colors = [mpl.colors.colorConverter.to_rgb(c) for c in colors] colors = [desaturate(c, .7) for c in colors] # Determine the gray color for the lines light_vals = [colorsys.rgb_to_hls(*c)[1] for c in colors] l = min(light_vals) * .6 gray = (l, l, l) return colors, gray
def test_desaturation_prop(): """Test that pct outside of [0, 1] raises exception.""" with pytest.raises(ValueError): utils.desaturate("blue", 50)
def violin(vals, groupby=None, inner="box", color=None, positions=None, names=None, widths=.8, alpha=None, join_rm=False, kde_thresh=1e-2, inner_kws=None, ax=None, **kwargs): """Create a violin plot (a combination of boxplot and KDE plot). Parameters ---------- vals : array or sequence of arrays data to plot groupby : grouping object if `vals` is a Series, this is used to group inner : box | sticks | points plot quartiles or individual sample values inside violin color : mpl color, sequence of colors, or seaborn palette name inner violin colors positions : number or sequence of numbers position of first violin or positions of each violin widths : float width of each violin at maximum density alpha : float, optional transparancy of violin fill join_rm : boolean, optional if True, positions in the input arrays are treated as repeated measures and are joined with a line plot names : list of strings, optional names to plot on x axis, otherwise plots numbers kde_thresh : float, optional proportion of maximum at which to threshold the KDE curve inner_kws : dict, optional keyword arugments for inner plot ax : matplotlib axis, optional axis to plot on, otherwise creates new one Returns ------- ax : matplotlib axis axis with violin plot """ if ax is None: ax = plt.gca() if isinstance(vals, pd.DataFrame): if names is None: names = vals.columns if vals.columns.name is not None: xlabel = vals.columns.name else: xlabel = None ylabel = None vals = vals.values elif isinstance(vals, pd.Series) and groupby is not None: if hasattr(groupby, "name"): xlabel = groupby.name if names is None: names = np.sort(pd.unique(groupby)) ylabel = vals.name grouped_vals = pd.groupby(vals, groupby).values vals = grouped_vals.values else: xlabel = None ylabel = None if hasattr(vals, 'shape'): if len(vals.shape) == 1: if hasattr(vals[0], 'shape'): vals = list(vals) else: vals = [vals] elif len(vals.shape) == 2: nr, nc = vals.shape if nr == 1: vals = [vals] elif nc == 1: vals = [vals.ravel()] else: vals = [vals[:, i] for i in xrange(nc)] else: raise ValueError("Input x can have no more than 2 dimensions") if not hasattr(vals[0], '__len__'): vals = [vals] vals = [np.asarray(a, float) for a in vals] if color is None: colors = husl_palette(len(vals), l=.7) else: if hasattr(color, "__iter__") and not isinstance(color, tuple): colors = color else: try: color = mpl.colors.colorConverter.to_rgb(color) colors = [color for _ in vals] except ValueError: colors = color_palette(color, len(vals)) colors = [mpl.colors.colorConverter.to_rgb(c) for c in colors] colors = [desaturate(c, .7) for c in colors] light_vals = [colorsys.rgb_to_hls(*c)[1] for c in colors] l = min(light_vals) * .6 gray = (l, l, l) if inner_kws is None: inner_kws = {} if positions is None: positions = np.arange(1, len(vals) + 1) elif not hasattr(positions, "__iter__"): positions = np.arange(positions, len(vals) + positions) in_alpha = inner_kws.pop("alpha", .6 if inner == "points" else 1) in_alpha *= 1 if alpha is None else alpha in_color = inner_kws.pop("color", gray) in_marker = inner_kws.pop("marker", ".") in_lw = inner_kws.pop("lw", 1.5 if inner == "box" else .8) for i, a in enumerate(vals): x = positions[i] kde = stats.gaussian_kde(a) y = _kde_support(a, kde, 1000, kde_thresh) dens = kde(y) scl = 1 / (dens.max() / (widths / 2)) dens *= scl ax.fill_betweenx(y, x - dens, x + dens, alpha=alpha, color=colors[i]) if inner == "box": for quant in moss.percentiles(a, [25, 75]): q_x = kde(quant) * scl q_x = [x - q_x, x + q_x] ax.plot(q_x, [quant, quant], color=in_color, linestyle=":", linewidth=in_lw, **inner_kws) med = np.median(a) m_x = kde(med) * scl m_x = [x - m_x, x + m_x] ax.plot(m_x, [med, med], color=in_color, linestyle="--", linewidth=in_lw, **inner_kws) elif inner == "stick": x_vals = kde(a) * scl x_vals = [x - x_vals, x + x_vals] ax.plot(x_vals, [a, a], color=in_color, linewidth=in_lw, alpha=in_alpha, **inner_kws) elif inner == "points": x_vals = [x for _ in a] ax.plot(x_vals, a, in_marker, color=in_color, alpha=in_alpha, mew=0, **inner_kws) for side in [-1, 1]: ax.plot((side * dens) + x, y, c=gray, linewidth=1.5) if join_rm: ax.plot(range(1, len(vals) + 1), vals, color=in_color, alpha=2. / 3) ax.set_xticks(positions) if names is not None: if len(vals) != len(names): raise ValueError("Length of names list must match nuber of bins") ax.set_xticklabels(names) ax.set_xlim(positions[0] - .5, positions[-1] + .5) if xlabel is not None: ax.set_xlabel(xlabel) if ylabel is not None: ax.set_ylabel(ylabel) ax.xaxis.grid(False) return ax
def boxplot(vals, groupby=None, names=None, join_rm=False, color=None, alpha=None, fliersize=3, linewidth=1.5, widths=.8, ax=None, **kwargs): """Wrapper for matplotlib boxplot that allows better color control. Parameters ---------- vals : sequence of data containers data for plot groupby : grouping object if `vals` is a Series, this is used to group names : list of strings, optional names to plot on x axis, otherwise plots numbers join_rm : boolean, optional if True, positions in the input arrays are treated as repeated measures and are joined with a line plot color : mpl color, sequence of colors, or seaborn palette name inner box color alpha : float transparancy of the inner box color fliersize : float, optional markersize for the fliers linewidth : float, optional width for the box outlines and whiskers ax : matplotlib axis, optional will plot in axis, or create new figure axis kwargs : additional keyword arguments to boxplot Returns ------- ax : matplotlib axis axis where boxplot is plotted """ if ax is None: ax = plt.gca() if isinstance(vals, pd.DataFrame): if names is None: names = vals.columns if vals.columns.name is not None: xlabel = vals.columns.name else: xlabel = None vals = vals.values ylabel = None elif isinstance(vals, pd.Series) and groupby is not None: if names is None: names = np.sort(pd.unique(groupby)) if hasattr(groupby, "name"): xlabel = groupby.name ylabel = vals.name grouped_vals = pd.groupby(vals, groupby).values vals = grouped_vals.values else: xlabel = None ylabel = None boxes = ax.boxplot(vals, patch_artist=True, widths=widths, **kwargs) vals = np.atleast_2d(vals).T if color is None: colors = husl_palette(len(vals), l=.7) else: if hasattr(color, "__iter__") and not isinstance(color, tuple): colors = color else: try: color = mpl.colors.colorConverter.to_rgb(color) colors = [color for _ in vals] except ValueError: colors = color_palette(color, len(vals)) colors = [mpl.colors.colorConverter.to_rgb(c) for c in colors] colors = [desaturate(c, .7) for c in colors] light_vals = [colorsys.rgb_to_hls(*c)[1] for c in colors] l = min(light_vals) * .6 gray = (l, l, l) for i, box in enumerate(boxes["boxes"]): box.set_color(colors[i]) if alpha is not None: box.set_alpha(alpha) box.set_edgecolor(gray) box.set_linewidth(linewidth) for i, whisk in enumerate(boxes["whiskers"]): whisk.set_color(gray) whisk.set_linewidth(linewidth) whisk.set_linestyle("-") for i, cap in enumerate(boxes["caps"]): cap.set_color(gray) cap.set_linewidth(linewidth) for i, med in enumerate(boxes["medians"]): med.set_color(gray) med.set_linewidth(linewidth) for i, fly in enumerate(boxes["fliers"]): fly.set_color(gray) fly.set_marker("d") fly.set_markeredgecolor(gray) fly.set_markersize(fliersize) if join_rm: ax.plot(range(1, len(vals.T) + 1), vals.T, color=gray, alpha=2. / 3) if names is not None: ax.set_xticklabels(names) if xlabel is not None: ax.set_xlabel(xlabel) if ylabel is not None: ax.set_ylabel(ylabel) ax.xaxis.grid(False) return ax
def boxplot(vals, groupby=None, names=None, join_rm=False, color=None, alpha=None, fliersize=3, linewidth=1.5, widths=.8, ax=None, **kwargs): """Wrapper for matplotlib boxplot that allows better color control. Parameters ---------- vals : sequence of data containers data for plot groupby : grouping object if `vals` is a Series, this is used to group names : list of strings, optional names to plot on x axis, otherwise plots numbers join_rm : boolean, optional if True, positions in the input arrays are treated as repeated measures and are joined with a line plot color : mpl color, sequence of colors, or seaborn palette name inner box color alpha : float transparancy of the inner box color fliersize : float, optional markersize for the fliers linewidth : float, optional width for the box outlines and whiskers ax : matplotlib axis, optional will plot in axis, or create new figure axis kwargs : additional keyword arguments to boxplot Returns ------- ax : matplotlib axis axis where boxplot is plotted """ if ax is None: ax = plt.gca() if isinstance(vals, pd.DataFrame): if names is None: names = vals.columns if vals.columns.name is not None: xlabel = vals.columns.name else: xlabel = None vals = vals.values ylabel = None elif isinstance(vals, pd.Series) and groupby is not None: if names is None: names = pd.unique(groupby) if hasattr(groupby, "name"): xlabel = groupby.name ylabel = vals.name grouped_vals = pd.groupby(vals, groupby).values if names is None: names = grouped_vals.index vals = grouped_vals.values else: xlabel = None ylabel = None boxes = ax.boxplot(vals, patch_artist=True, widths=widths, **kwargs) vals = np.atleast_2d(vals).T if color is None: colors = husl_palette(len(vals), l=.7) else: if hasattr(color, "__iter__") and not isinstance(color, tuple): colors = color else: try: color = mpl.colors.colorConverter.to_rgb(color) colors = [color for _ in vals] except ValueError: colors = color_palette(color, len(vals)) colors = [mpl.colors.colorConverter.to_rgb(c) for c in colors] colors = [desaturate(c, .7) for c in colors] light_vals = [colorsys.rgb_to_hls(*c)[1] for c in colors] l = min(light_vals) * .6 gray = (l, l, l) for i, box in enumerate(boxes["boxes"]): box.set_color(colors[i]) if alpha is not None: box.set_alpha(alpha) box.set_edgecolor(gray) box.set_linewidth(linewidth) for i, whisk in enumerate(boxes["whiskers"]): whisk.set_color(gray) whisk.set_linewidth(linewidth) whisk.set_linestyle("-") for i, cap in enumerate(boxes["caps"]): cap.set_color(gray) cap.set_linewidth(linewidth) for i, med in enumerate(boxes["medians"]): med.set_color(gray) med.set_linewidth(linewidth) for i, fly in enumerate(boxes["fliers"]): fly.set_color(gray) fly.set_marker("d") fly.set_markeredgecolor(gray) fly.set_markersize(fliersize) if join_rm: ax.plot(range(1, len(vals.T) + 1), vals.T, color=gray, alpha=2. / 3) if names is not None: ax.set_xticklabels(names) if xlabel is not None: ax.set_xlabel(xlabel) if ylabel is not None: ax.set_ylabel(ylabel) ax.xaxis.grid(False) return ax
def violin(vals, groupby=None, inner="box", color=None, positions=None, names=None, widths=.8, alpha=None, join_rm=False, kde_thresh=1e-2, inner_kws=None, ax=None, **kwargs): """Create a violin plot (a combination of boxplot and KDE plot). Parameters ---------- vals : array or sequence of arrays data to plot groupby : grouping object if `vals` is a Series, this is used to group inner : box | sticks | points plot quartiles or individual sample values inside violin color : mpl color, sequence of colors, or seaborn palette name inner violin colors positions : number or sequence of numbers position of first violin or positions of each violin widths : float width of each violin at maximum density alpha : float, optional transparancy of violin fill join_rm : boolean, optional if True, positions in the input arrays are treated as repeated measures and are joined with a line plot names : list of strings, optional names to plot on x axis, otherwise plots numbers kde_thresh : float, optional proportion of maximum at which to threshold the KDE curve inner_kws : dict, optional keyword arugments for inner plot ax : matplotlib axis, optional axis to plot on, otherwise creates new one Returns ------- ax : matplotlib axis axis with violin plot """ if ax is None: ax = plt.gca() if isinstance(vals, pd.DataFrame): if names is None: names = vals.columns if vals.columns.name is not None: xlabel = vals.columns.name else: xlabel = None ylabel = None vals = vals.values elif isinstance(vals, pd.Series) and groupby is not None: if hasattr(groupby, "name"): xlabel = groupby.name ylabel = vals.name grouped_vals = pd.groupby(vals, groupby).values if names is None: names = grouped_vals.index vals = grouped_vals.values else: xlabel = None ylabel = None if hasattr(vals, 'shape'): if len(vals.shape) == 1: if hasattr(vals[0], 'shape'): vals = list(vals) else: vals = [vals] elif len(vals.shape) == 2: nr, nc = vals.shape if nr == 1: vals = [vals] elif nc == 1: vals = [vals.ravel()] else: vals = [vals[:, i] for i in xrange(nc)] else: raise ValueError("Input x can have no more than 2 dimensions") if not hasattr(vals[0], '__len__'): vals = [vals] vals = [np.asarray(a, float) for a in vals] if color is None: colors = husl_palette(len(vals), l=.7) else: if hasattr(color, "__iter__") and not isinstance(color, tuple): colors = color else: try: color = mpl.colors.colorConverter.to_rgb(color) colors = [color for _ in vals] except ValueError: colors = color_palette(color, len(vals)) colors = [mpl.colors.colorConverter.to_rgb(c) for c in colors] colors = [desaturate(c, .7) for c in colors] light_vals = [colorsys.rgb_to_hls(*c)[1] for c in colors] l = min(light_vals) * .6 gray = (l, l, l) if inner_kws is None: inner_kws = {} if positions is None: positions = np.arange(1, len(vals) + 1) elif not hasattr(positions, "__iter__"): positions = np.arange(positions, len(vals) + positions) in_alpha = inner_kws.pop("alpha", .6 if inner == "points" else 1) in_alpha *= 1 if alpha is None else alpha in_color = inner_kws.pop("color", gray) in_marker = inner_kws.pop("marker", ".") in_lw = inner_kws.pop("lw", 1.5 if inner == "box" else .8) for i, a in enumerate(vals): x = positions[i] kde = stats.gaussian_kde(a) y = _kde_support(a, kde, 1000, kde_thresh) dens = kde(y) scl = 1 / (dens.max() / (widths / 2)) dens *= scl ax.fill_betweenx(y, x - dens, x + dens, alpha=alpha, color=colors[i]) if inner == "box": for quant in moss.percentiles(a, [25, 75]): q_x = kde(quant) * scl q_x = [x - q_x, x + q_x] ax.plot(q_x, [quant, quant], color=in_color, linestyle=":", linewidth=in_lw, **inner_kws) med = np.median(a) m_x = kde(med) * scl m_x = [x - m_x, x + m_x] ax.plot(m_x, [med, med], color=in_color, linestyle="--", linewidth=in_lw, **inner_kws) elif inner == "stick": x_vals = kde(a) * scl x_vals = [x - x_vals, x + x_vals] ax.plot(x_vals, [a, a], color=in_color, linewidth=in_lw, alpha=in_alpha, **inner_kws) elif inner == "points": x_vals = [x for _ in a] ax.plot(x_vals, a, in_marker, color=in_color, alpha=in_alpha, mew=0, **inner_kws) for side in [-1, 1]: ax.plot((side * dens) + x, y, c=gray, linewidth=1.5) if join_rm: ax.plot(range(1, len(vals) + 1), vals, color=in_color, alpha=2. / 3) ax.set_xticks(positions) if names is not None: if len(vals) != len(names): raise ValueError("Length of names list must match nuber of bins") ax.set_xticklabels(names) ax.set_xlim(positions[0] - .5, positions[-1] + .5) if xlabel is not None: ax.set_xlabel(xlabel) if ylabel is not None: ax.set_ylabel(ylabel) ax.xaxis.grid(False) return ax