def confidence_interval(data, confidence=0.99): """Given data, calculate the percent confidence intervals for it. Returns mean, mean - ci, mean + ci """ _a = as_flattened_numpy(data) return stats.t.interval(confidence, _a.shape[0] - 1, loc=np.mean(_a), scale=stats.sem(_a))
def auto_fit(data, option="slim"): """Attempts to fit the best distribution to some given data. Currently only compatible with continuous distributions with a *fit* method. Parameters ---------- data : np.ndarray (n,) If multi-dimensional, flattens it to 1D. option : str/tuple of str, default="slim" Choose between "slim" and "full", slim only compares the most popular distributions with fewer parameters. Returns ------- df : DataFrame Results dataframe of each fitted model. """ _data = as_flattened_numpy(data) dists = _get_distribution_set(option) # make as dataframe return _get_qqplot_score_correlate(_data, dists)
def univariate_kde( X: np.ndarray, bins: Optional[np.ndarray] = None, kde_name: str = "norm", kde_range: float = 1e-3, smoothen_kde: bool = True, verbose: int = 0, return_dist: bool = False, ): """Determines a univariate KDE approximation on a 1D sample, either continuous or discrete. .. note:: If x is multi-dimensional, the array is flattened. Parameters ---------- X : np.ndarray The 1D set to calculate. If more than 1-dim it is flattened. bins : np.ndarray, optional A range relating to the desired number of bins kde_name : str, optional, default='norm' The name relating to a scipy.stats.<kde_name> distribution If 'freeform': fits the best KDE to the data points. kde_range : float, default=1e-3 A range to use for continuous distributions smoothen_kde : bool, default=True whether to smooth the distribution appearance for discrete applications verbose : int, optional, default=0 If > 0, prints out useful messages return_dist : bool, default=False If True, returns the frozen scipy.stats.<kde_name> model with fitted parameters Returns ------- x_kde : np.ndarray The x-coordinates of the KDE y_kde : np.ndarray The kernel density approximation as a density score model : scipy.stats.rv_distribution (optional) The scipy model that has the distribution on it. """ supported_disc_dists = list(_get_discrete_single()) + list( _get_discrete_multiple()) # convert to numpy _X = as_flattened_numpy(X) if bins is None: bins = get_bins(_X) if kde_name == "freeform": _model = stats.gaussian_kde(_X) x_kde = np.linspace(_X.min(), _X.max(), 200) y_kde = _model.pdf(x_kde) else: _model, _params = fit_model(_X, kde_name, verbose=verbose, return_params=True) if kde_name in supported_disc_dists: if smoothen_kde: x_kde, y_kde = _smooth_kde(bins, _model) else: x_kde, y_kde = bins, _model.pmf(bins) else: # generate x_kde x_kde = np.linspace(_model.ppf(kde_range), _model.ppf(1 - kde_range), 200) y_kde = _model.pdf(x_kde) if return_dist: return x_kde, y_kde, _model else: return x_kde, y_kde
def bibox1d(X: _ArrayLike, Y: _ArrayLike, colors: Optional[_ListLike] = None, labels: Optional[_ListLike] = None, measured: Optional[str] = None, ax: Optional[mpl.axes.Axes] = None, mannwhitney: bool = True, with_strip: bool = False, vertical: bool = True, notch: bool = False, capsize: float = 1.0, outliers: bool = True, grid: bool = True, width: Union[float, List[float]] = 0.7, label_rotation: float = 0.0, label_max_length: int = 25, spines: Optional[_ListLike] = None, strip_jitter: float = 0.15, theme: str = "white_circle", **plot_kwargs): """Plots two 1-dimensional boxplots using vectors `X`, `Y`. Parameters ---------- X : list/tuple/np.ndarray/pd.Series (1d) The first data column to draw. Must be numeric. Y : list/tuple/np.ndarray/pd.Series (1d) The second data column to draw. Must be numeric. colors : str/list of str, optional If None, uses a default color labels : str/list of str, optional If set, draws this on the appropriate axis, if None, does nothing If X/Y is of type pandas.Series, uses this label instead. measured : str, optional A label to define what the measurement is ax : matplotlib.ax object, optional, default=None If None, creates a plot. mannwhitney : bool, default=True If True, performs a Mann-Whitney U test between the values with_strip : bool, default=False If True, draws a stripplot over the top of the boxplot, in a similar colour `outliers` are set to False in this case vertical : bool, default=True Determines whether to draw the plot vertically or horizontally notch : bool, default=False Determines whether to draw a notched plot capsize : float, default=1.0 Defines the length of the caps outliers : bool, default=True If True, displays fliers as outliers grid : bool, default=True If True: draws gridlines for the numeric axis width : float, default=0.7 Determines the width/height of the box label_rotation : float, default=0 The degrees of rotation to the ticklabels label_max_length : int, default=25 If any label exceeds this length, it truncates it spines : tuple, default=('top','left',bottom','right') Defines which spines are to be visible strip_jitter : float, default=0.15 With stripplot, defines the amount of jitter in the variables theme : str, default="white_circle" Choose a 'theme' for the outliers, from {'red_square', 'green_diamond'} Other Parameters ---------------- plot_kwargs : dict keyword arguments to pass to `ax.boxplot` Returns ------- ax : matplotlib.ax object Allows further modifications to the axes post-boxplot See Also -------- matplotlib.pyplot.boxplot References ---------- Inspiration from https://github.com/jbmouret/matplotlib_for_papers#colored-boxes """ instance_check((X, Y), (list, tuple, np.ndarray, pd.Series)) instance_check((colors, labels, spines), (type(None), list, pd.Index)) instance_check(ax, (type(None), mpl.axes.Axes)) instance_check((mannwhitney, vertical, notch, outliers, grid, with_strip), bool) instance_check((capsize, width, strip_jitter, label_rotation), (float, int)) instance_check(theme, str) instance_check(label_max_length, int) bounds_check(strip_jitter, 0.0, 1.0) _X = as_flattened_numpy(X) _Y = as_flattened_numpy(Y) _style = _get_flier_style(theme) if ax is None and vertical: fig, ax = plt.subplots(figsize=(3.5, 7)) elif ax is None and not vertical: fig, ax = plt.subplots(figsize=(7, 3.5)) if with_strip: outliers = False if spines is None: if vertical and mannwhitney: spines = ("bottom", "left", "right") elif not vertical and mannwhitney: spines = ("bottom", "left", "top") else: spines = ("bottom", "left", "top", "right") # sort out labels if labels is None: labels = [ X.name if isinstance(X, pd.Series) else "", Y.name if isinstance(Y, pd.Series) else "", ] box_alpha = 1.0 if not with_strip else 0.5 patch_obj = ax.boxplot([_X, _Y], vert=vertical, patch_artist=True, showfliers=outliers, notch=notch, widths=width, flierprops=_style, boxprops=dict(alpha=box_alpha), **plot_kwargs) # define boxplot extras _define_boxplot_arguments(ax, patch_obj, vertical, measured, grid, spines, capsize, None) # define basic colours - overrides if needs be colors = _kcolor_arrangement(patch_obj, colors) # label axes _label_axes(ax, labels, vertical, label_rotation, label_max_length) # if we have stripplot, draw this if with_strip: # plot x strips _overlay_stripplot(_X, ax, 1, width, colors[0], vertical, outliers, strip_jitter) _overlay_stripplot(_Y, ax, 2, width, colors[1], vertical, outliers, strip_jitter) # if we have mann-whitney append this info if mannwhitney: # determine mann-whitney U test z, p = mannwhitneyu(_X, _Y) # p-value * 2 p *= 2 star = _get_stars(p) # get dimensions to annotate joined = np.concatenate((_X, _Y)) _max, _min = np.max(joined), np.min(joined) # annotate on mann-whitney test if vertical: ax.annotate( "", xy=(1, _max), xycoords="data", xytext=(2, _max), textcoords="data", arrowprops=dict(arrowstyle="-", ec="#666666", connectionstyle="bar,fraction=0.2"), ) # add mw text ax.text( 1.5, _max + np.abs(_max - _min) * 0.1, star, horizontalalignment="center", verticalalignment="center", ) else: ax.annotate( "", xy=(_max, 2), xycoords="data", xytext=(_max, 1), textcoords="data", arrowprops=dict(arrowstyle="-", ec="#666666", connectionstyle="bar,fraction=0.2"), ) # add mw text ax.text( _max + np.abs(_max - _min) * 0.1, 1.5, star, horizontalalignment="center", verticalalignment="center", ) return ax
def box1d(X: _ArrayLike, color: Optional[str] = None, label: Optional[str] = None, ax: Optional[mpl.axes.Axes] = None, with_strip: bool = False, vertical: bool = True, notch: bool = False, capsize: float = 1.0, outliers: bool = True, axis_scale: Optional[Union[str, Callable]] = None, grid: bool = True, width: float = 0.7, label_rotation: float = 0.0, label_max_length: int = 25, spines: Optional[_ListLike] = None, theme: str = "white_circle", **plot_kwargs): """Plots a 1-dimensional boxplot using a vector. Parameters ---------- X : list/tuple/np.ndarray/pd.Series (1d) The data column to draw. Must be numeric. color : str, optional If None, uses a default color label : str, optional If set, draws this on the appropriate axis, if None, does nothing If X is of type pandas.Series, uses this label instead. ax : matplotlib.ax object, optional, default=None If None, creates a plot. with_strip : bool, default=False If True, draws a stripplot over the top of the boxplot, in a similar colour `outliers` are set to False in this case vertical : bool, default=True Determines whether to draw the plot vertically or horizontally notch : bool, default=False Determines whether to draw a notched plot capsize : float, default=1.0 Defines the length of the caps outliers : bool, default=True If True, displays outfliers as outliers axis_scale: str/callable, optional Scales the data along the axis. If str, use {'log', 'sqrt', 'log2'} If callable, must reference a `np.*` function which takes array X and returns X' grid : bool, default=True If True: draws gridlines for the numeric axis width : float, default=0.7 Determines the width/height of the box label_rotation : float, default=0 The degrees of rotation to the ticklabels label_max_length : int, default=25 If any label exceeds this length, it truncates it spines : tuple, default=('top','left',bottom','right') Defines which spines are to be visible theme : str, default="white_circle" Choose a 'theme' for the outliers, from {'red_square', 'green_diamond'} Other Parameters ---------------- plot_kwargs : dict keyword arguments to pass to `ax.boxplot` Returns ------- ax : matplotlib.ax object Allows further modifications to the axes post-boxplot """ instance_check(X, (np.ndarray, pd.Series, list, tuple)) instance_check((vertical, notch, outliers, grid, with_strip), bool) instance_check(spines, (type(None), list)) instance_check(theme, str) instance_check((label, color), (type(None), str)) instance_check((capsize, width), float) instance_check(label_rotation, (int, float)) instance_check(label_max_length, int) bounds_check(width, 0.0, 1.0) # convert option to numpy _X = as_flattened_numpy(X) _style = _get_flier_style(theme) # convert X data if we have axis_scale if axis_scale: _X = _convert_x_scale(_X, axis_scale) if with_strip: outliers = False if ax is None and vertical: fig, ax = plt.subplots(figsize=(2.5, 5)) elif ax is None and not vertical: fig, ax = plt.subplots(figsize=(5, 2.5)) if spines is None: spines = ("left", "top", "right", "bottom") box_alpha = 1.0 if not with_strip else 0.5 patch_obj = ax.boxplot(_X, vert=vertical, patch_artist=True, showfliers=outliers, notch=notch, widths=width, boxprops=dict(alpha=box_alpha), flierprops=_style, **plot_kwargs) # define basic arguments _define_boxplot_arguments(ax, patch_obj, vertical, None, grid, spines, capsize, axis_scale) # define colour features color = _color_arrangement(ax, patch_obj, color) # label the appropriate axes _label_axes( ax, X.name if isinstance(X, pd.Series) else label, vertical, label_rotation, label_max_length, ) # plot the strips if with_strip: _overlay_stripplot(_X, ax, 1, width, color, vertical, outliers, strip_jitter=0.15) return ax
def histogram(X: _ArrayLike, kde: str = "freeform", bins: Optional[Union[int, _ListLike]] = None, density: bool = True, stat: bool = False, ax: Optional[mpl.axes.Axes] = None, x_label: str = "", title: str = "", kde_range: float = 1e-3, smoothen_kde: bool = True, verbose: int = 0, *hist_args, **hist_kwargs) -> mpl.axes.Axes: """Draws pretty histograms using `X`. Parameters ---------- X : list/tuple/np.ndarray/pd.Series (1d) The data column to draw. Must be numeric. kde : str/tuple of str, optional, default="freeform" If None, does not draw a KDE plot If 'freeform': fits the best KDE to the points If 'auto': attempts to fit the best `continuous` distribution If list/tuple: uses 'auto' to fit the best distribution out of options else, choose from available distributions in `scipy.stats` bins : int, optional If None, uses optimal algorithm to find best bin count density : bool, default=True If True, uses density approximation stat : bool, default=False If True, sets statistical variables in legend ax : matplotlib.ax object, optional, default=None If None, creates one. x_label : str, optional, default=None If None, uses `x-axis`. title : str, optional, default="" If None, uses `Default Title` kde_range : float, default=1e-3 Defines the precision on the KDE range if plotted between (1e-3, 1-1e-3) Must be > 0. smoothen_kde : bool, default=True If discrete-distribution, applies smoothing function to KDE if True verbose : int, default=0 If > 0, prints out useful messages Other Parameters ---------------- args ; list Arguments to pass to `ax.hist` kwargs : dict Keyword arguments to pass to `ax.hist` Returns ------- ax : matplotlib.ax object Allows further modifications to the axes post-histogram """ instance_check(X, (np.ndarray, pd.Series, list, tuple)) instance_check((density, stat, smoothen_kde), bool) instance_check((title, x_label), str) instance_check(kde, (str, type(None), list, tuple)) instance_check(kde_range, float) bounds_check(verbose, 0, 4) # convert to numpy. _X = as_flattened_numpy(X) # make bins if set to None if bins is None: # if X is float, use freedman_diaconis_bins determinant, else simply np.arange for integer input. bins = get_bins(_X) if kde: density = True # plot histogram if ax is None: fig, ax = plt.subplots(figsize=(8, 5)) if stat: stat_label = "mean: {:0.2f}, sd: {:0.3f},\n skew: {:0.3f} kurt: {:0.3f}".format( np.nanmean(_X), np.nanstd(_X), stats.skew(_X), stats.kurtosis(_X)) # plot the histogram _plot_hist(_X, ax, bins=bins, density=density, rwidth=0.9, label=stat_label, *hist_args, **hist_kwargs) ax.legend(loc="best") else: # plot the histogram _plot_hist(_X, ax, bins=bins, density=density, rwidth=0.9, *hist_args, **hist_kwargs) ax.set_title(title) if density: ax.set_ylabel("Density") else: ax.set_ylabel("Counts") if kde is not None: if kde == "auto" or isinstance(kde, (list, tuple)): # uses slim parameters by default auto_fitted = auto_fit(_X, kde) best_model_ = auto_fitted.loc[auto_fitted["r"].idxmax()] # set kde to the name given x_kde, y_kde, model = univariate_kde( _X, bins, best_model_.name, kde_range=1e-3, smoothen_kde=smoothen_kde, verbose=verbose, return_dist=True, ) elif (kde == "freeform") or hasattr(stats, kde): # fetches the kde if possible auto_fitted = None x_kde, y_kde, model = univariate_kde( _X, bins, kde, kde_range=1e-3, smoothen_kde=smoothen_kde, verbose=verbose, return_dist=True, ) else: raise ValueError( "kde value '{}' not found in scipy.stats".format(kde)) # plot ax.plot(x_kde, y_kde, "-", color="r") else: auto_fitted = None model = None if x_label == "": x_label = _assign_x_label( title, X.name if isinstance(X, pd.Series) else "", kde is not None, auto_fitted, model if not kde == "freeform" else None, ) ax.set_xlabel(x_label) return ax
def scatter_slim(X: _ArrayLike, Y: _ArrayLike, bins: Optional[int] = None, threshold: Union[int, float] = 50, **turbo_kws): """ Generates a slim-down scatterplot. This is useful where there are thousands of points overlapping, and for visualization and storage size, you only plot so many points within a given bin area. Parameters ---------- X : list/tuple/np.ndarray/pd.Series (1d) The data column to draw on the x-axis. Flattens if np.ndarray Y : list/tuple/np.ndarray/pd.Series (1d) The data column to draw on the y-axis. Flattens if np.ndarray bins : int, optional Specifies the bins to split X,Y domain, if optional this is optimized for threshold : int or float Specifies the threshold above which nsamples are dropped in each bin. If float, specifies the proportion of points [0..1] to keep in each bin. turbo_kws : dict Keyword arguments to pass to `turb.plot.scatter`. All other arguments go to `ax.scatter`. Returns ------- ax : matplotlib.ax object Allows further modifications to the axes post-scatter """ # defines some turbo keywords, everything else is scatter_kws turbo_keys = { 'c', 's', 'marker', 'dense', 'fit_line', 'ax', 'alpha', 'cmap', 'legend', 'colorbar', 'with_jitter', 'x_label', 'y_label', 'x_scale', 'y_scale', 'legend_outside', 'title', 'with_grid', 'fit_line_degree' } our_keys = set(turbo_kws.keys()) # intersection between the two. used_keys = turbo_keys & our_keys t_kws = {x: turbo_kws[x] for x in used_keys} mpl_kws = {x: turbo_kws[x] for x in our_keys - used_keys} # get subset where missing values from either are dropped _X = as_flattened_numpy(X) _Y = as_flattened_numpy(Y) # paired values _X, _Y = remove_na(_X, _Y, paired=True) # get the bins if bins is None: # we just use x here. bins_x = freedman_diaconis_bins(_X) bins_y = freedman_diaconis_bins(_Y) # take the average, integer divison bins = (bins_x + bins_y) // 2 else: # ensure its non-negative nonnegative(bins, int) # compute the binned density s, xs, ys = np.histogram2d(_X, _Y, bins=bins) xs_lw = xs[:-1] xs_up = xs[1:] ys_lw = ys[:-1] ys_up = ys[1:] indices = [] # loop through all the bins and compute a valid sample subset for i in range(bins): for j in range(bins): x_b = np.logical_and(_X >= xs_lw[i], _X < xs_up[i]) y_b = np.logical_and(_Y >= ys_lw[j], _Y < ys_up[j]) # indices i_b = np.argwhere(np.logical_and(x_b, y_b)).flatten() i_bn = i_b.shape[0] # if this is empty, do nothing else, select subset and return if i_bn > 0: samp_size = i_bn if type(threshold) == int: samp_size = min(i_bn, threshold) elif type(threshold) == float: samp_size = min(i_bn, int(i_bn * threshold)) # sample samp = np.random.choice(i_b, samp_size, replace=False) indices.append(samp) ni = np.hstack(indices) # x and y is now selected using ni return scatter(_X[ni], _Y[ni], **t_kws, **mpl_kws)
def scatter(X: _ArrayLike, Y: _ArrayLike, c: Union[str, _ArrayLike] = "k", marker: Union[str, _ArrayLike] = "o", s: Optional[Union[_Numeric, _ArrayLike]] = None, dense: bool = False, fit_line: bool = False, ax: Optional[mpl.axes.Axes] = None, alpha: Optional[float] = None, cmap: str = "viridis", legend: bool = True, colorbar: bool = True, with_jitter: bool = False, x_label: Optional[str] = None, y_label: Optional[str] = None, x_scale: str = "linear", y_scale: str = "linear", legend_outside: bool = False, title: str = "", with_grid: bool = False, fit_line_degree: int = 1, **scatter_kws): """Generates a scatterplot, with some useful features added to it. Parameters ---------- X : list/tuple/np.ndarray/pd.Series (1d) The data column to draw on the x-axis. Flattens if np.ndarray Y : list/tuple/np.ndarray/pd.Series (1d) The data column to draw on the y-axis. Flattens if np.ndarray c : str/list/tuple/np.ndarray/pd.Series (1d), default='blue' The colour of the points. If array, colors must be a categorical/valid float type, uses cmap marker : str/list/tuple/np.ndarray/pd.Series (1d), default='o' The marker style of the points. If type=list/array, array must be a categorical/str-like type to map to matplotlib markers If dense=True, treats each marker as a circle, ignores this input s : int/float/list/tuple/np.ndarray/pd.Series (1d), optional Size of each point. If dense=True, this value is set automatically. If type=list/array, array must be array of floats dense : bool If True, draws the uniform densities instead of the actual points fit_line : bool If True, draws a line of best fit on the data ax : matplotlib.ax.Axes, optional, default=None If None, creates one. alpha : float, optional Sets the alpha for colour. If dense is True, this value is set automatically cmap : str, default="viridis" The default colormap for continuous-valued c. legend : bool, default=True Draws a legend if the 'c' variable is discrete colorbar : bool, default=True Draws a colorbar if the 'c' variable is continuous with_jitter : bool, default=False If True, and dense=True, adds some jitter to the uniform points x_label : str, default="x-axis" If X is not a pandas.Series, this is used y_label : str, default="y-axis" If Y is not a pandas.Series, this is used x_scale : str, default="linear" Choose from {'linear', 'log', 'symlog', 'logit'}, see `matplotlib.ax.set_xscale` y_scale : str, default="linear" Choose from {'linear', 'log', 'symlog', 'logit'}, see `matplotlib.ax.set_yscale` legend_outside : bool, default=False If True, plots the legend outside the plot at (1, 1) title : str, default="" Optional title at the top of the axes with_grid : bool, default=False If True, draws a grid fit_line_degree : int, default=1 If fit_line=True, Determines the degree to which a line is fitted to the data, allows polynomials Other Parameters ---------------- scatter_kws : dict Keyword arguments to pass to `ax.scatter` Returns ------- ax : matplotlib.ax object Allows further modifications to the axes post-scatter """ instance_check((X, Y), (list, tuple, np.ndarray, Series)) instance_check((c, marker), (str, list, tuple, np.ndarray, Series, Index)) instance_check( s, (type(None), int, float, list, tuple, np.ndarray, Series, Index)) instance_check(alpha, (type(None), float)) instance_check(ax, (type(None), mpl.axes.Axes)) instance_check( (dense, with_jitter, fit_line, with_grid, legend, legend_outside), bool) instance_check((x_label, y_label, title, x_scale, y_scale), (type(None), str)) instance_check(fit_line_degree, int) arrays_equal_size(X, Y) if isinstance(marker, str): belongs(marker, _marker_set()) # get subset where missing values from either are dropped _X = as_flattened_numpy(X) _Y = as_flattened_numpy(Y) # remove values not found in both. _X, _Y = remove_na(_X, _Y, paired=True) # warn the user if n is large to maybe consider dense option? if _X.shape[0] > 15000 and not dense: warn( "Data input n={} is large, consider setting dense=True or using function `scatter_slim`." .format(X.shape[0]), UserWarning, ) # reconfigure colors if qualitative if isinstance(s, (list, tuple)) and not dense: s = as_flattened_numpy(s) arrays_equal_size(X, Y, s) if isinstance(marker, (list, tuple)) and not dense: marker = np.asarray(marker) arrays_equal_size(X, Y, marker) if not isinstance(c, str): # do some prep work on the color variable. palette, _cmode = cat_array_to_color(c, cmap=cmap) # perform size check arrays_equal_size(X, Y, palette) else: palette = c _cmode = "static" if ax is None: fig, ax = plt.subplots(figsize=(8, 5)) if dense: # alpha, s are set in this case alpha = 0.8 marker = "o" # perform density plotting bins_x = min(freedman_diaconis_bins(_X), 50) bins_y = min(freedman_diaconis_bins(_Y), 50) # estimate counts using histogram2d s, xs, ys = np.histogram2d(_X, _Y, bins=(bins_x, bins_y)) # create a mesh xp, yp = np.meshgrid(xs[:-1], ys[:-1]) if with_jitter: xp += np.random.rand(*xp.shape) / (_X.max() - _X.min()) yp += np.random.rand(*yp.shape) / (_Y.max() - _Y.min()) else: if alpha is None: alpha = _select_best_alpha(_X.shape[0]) if s is None: s = _select_best_size(_X.shape[0]) xp = _X yp = _Y # draw _ = _draw_scatter(xp, yp, palette, s, marker, alpha, ax, cmap=cmap, **scatter_kws) # optionally fit a line of best fit if fit_line: _draw_line_best_fit(_X, _Y, palette, ax, fit_line_degree) if with_grid: ax.grid() # associate legend if colour map is used if _cmode == "discrete" and legend: map_legend(c, palette, marker, ax, legend_outside) elif _cmode == "continuous" and colorbar: # add colorbar _make_colorbar(c, ax, cmap) # apply x-label, y-label, title if isinstance(x_label, str): ax.set_xlabel(x_label) elif isinstance(X, Series): ax.set_xlabel(X.name) if isinstance(y_label, str): ax.set_ylabel(y_label) elif isinstance(Y, Series): ax.set_ylabel(Y.name) ax.set_xscale(x_scale) ax.set_yscale(y_scale) ax.set_title(title) return ax
def bar1d( X: _ArrayLike, Y: Optional[_ListLike] = None, c: Optional[Union[_ArrayLike, str]] = "k", vert: bool = True, sort: bool = True, ax: Optional[mpl.axes.Axes] = None, scale: str = "linear", annotate: bool = False, legend: bool = False, width: float = 0.8, label_rotation: float = 0.0, value_label: Optional[str] = None, sort_by: str = "values", cmap: str = "Blues", linesAt: Optional[Union[_Numeric, _ListLike]] = None, ): """Plots a 1 dimensional barplot. Parameters ---------- X : list, tuple, np.ndarray, pd.Series Categorical/string/time labels for the data. If pandas.Series, Index must be categorical, Values must be numeric. Y : list, tuple, np.ndarray, optional If None, X must be a pd.Series. Must be numeric dtype. c : str/list/tuple/np.ndarray/pd.Series (1d), optional Defines the colour of each bar. If str, colours all of the bars with the same If array, must be a categorical type. If None, uses an automatic qualitative palette vert : bool, default=True Determines whether the plot is vertical or horizontal sort : bool, default=True Sorts the data or labels ax : matplotlib.ax.Axes, optional, default=None If None, creates one. scale : str, default="linear" Determines how to scale the numeric axis. annotate : bool, default=False Determines whether values should be annotated legend : bool, default=False Choose whether to display a legend width : float, default=0.8 The width of each bar in the barplot label_rotation : float, default=0 The degrees of rotation to the ticklabels value_label : str, optional Defines a name for the numerical axis sort_by : str, default="values" Defines how to sort the data if sort=True. Choose from {'values', 'labels'} cmap : str, default="Blues" Defines a colormap if color values are specified linesAt : int, float, list, tuple, optional If set, defines one or more vertical lines to add to the barplot Returns ------- ax : matplotlib.ax object Allows further modifications to the axes post-boxplot """ # define plot if not set belongs(sort_by, ("values", "labels")) if ax is None: fig, ax = plt.subplots(figsize=(8, 5)) # X is either numerical (in which case there is no Y, or categorical labels) if Y is None: # in this case, X must contain all the data if isinstance(X, pd.Series): _labels = as_flattened_numpy(X.index) _values = as_flattened_numpy(X.values) _ticks = np.arange(X.shape[0]) value_label = X.name else: _labels = _ticks = np.arange(len(X)) _values = as_flattened_numpy(X) else: # X is labels, Y are numeric values (assume!) _labels = as_flattened_numpy(X) _values = as_flattened_numpy(Y) _ticks = np.arange(_labels.shape[0]) # sort out colour pal = _determine_color_palette(c, _ticks.shape[0], cmap) # perform sorting here if sort: if sort_by == "values": _order = np.argsort(_values) elif sort_by == "labels": _order = np.argsort(_labels) else: raise ValueError( "sort_by '{}': must be ['values', 'labels']".format(sort_by) ) # apply sort if not isinstance(c, (type(None), str)): _labels, _values, pal = _apply_data_sort(_order, _labels, _values, pal) else: _labels, _values = _apply_data_sort(_order, _labels, _values) # plot the bar _plot_bar_orient( ax, _ticks, _labels, _values, c=pal, w=width, vert=vert, lrot=label_rotation, annotate=annotate, lines=linesAt, vlabel=value_label, ) # orient scale if vert: ax.set_yscale(scale) else: ax.set_xscale(scale) # map a legend to it if legend and not isinstance(c, str): map_legend(c, pal, "o", ax, False) return ax
def annotate(X: Union[np.ndarray, Series, List, Tuple], Y: Union[np.ndarray, Series, List, Tuple], T: Union[np.ndarray, Series, List, Tuple], subset: Optional[Union[np.ndarray, Series, List, Tuple]] = None, ax: mpl.axes.Axes = None, word_shorten: Optional[int] = None, **annotate_kws): """Annotates a matplotlib plot with text. Offsets are pre-determined according to the scale of the plot. Parameters ---------- X : list/tuple/np.ndarray/pd.Series (1d) The x-positions of the text. Y : list/tuple/np.ndarray/pd.Series (1d) The y-positions of the text. T : list/tuple/np.ndarray/pd.Series (1d) The text array. subset : list/tuple/np.ndarray/pd.Series (1d) An array of indices to select a subset from. ax : matplotlib.ax.Axes, optional, default=None If None, creates one. word_shorten : int, optional If not None, shortens annotated strings to be more concise and displayable Other Parameters ---------------- annotate_kws : dict Other keywords to pass to `ax.annotate` Returns ------- ax : matplotlib.ax.Axes The same matplotlib plot, or the one generated """ instance_check((X, Y), (list, tuple, np.ndarray, Series)) instance_check(T, (list, tuple, np.ndarray, Series, Index)) instance_check(subset, (type(None), list, tuple, np.ndarray, Series, Index)) instance_check(ax, (type(None), mpl.axes.Axes)) arrays_equal_size(X, Y, T) # convert to numpy. _X = as_flattened_numpy(X).copy() _Y = as_flattened_numpy(Y).copy() _T = as_flattened_numpy(T) if word_shorten: _T = shorten(_T, newl=word_shorten) if _X.dtype.kind == "f": _X += (_X.max() - _X.min()) / 30.0 _Y += -((_Y.max() - _Y.min()) / 30.0) if ax is None: fig, ax = plt.subplots(figsize=(8, 5)) if subset is None: for x, y, t in it.zip_longest(_X, _Y, _T): ax.annotate(t, xy=(x, y), **annotate_kws) else: for i in subset: ax.annotate(_T[i], xy=(_X[i], _Y[i]), **annotate_kws) return ax