def boxplot(x, y, data=None, legend=None, marker='o', alpha=.5, points=True, cumprob=False, yscale='linear', cmap='default', figsize=(12, 6), orderby=None, table=True, fig=None, axes=None, cgrid=None, violin=False, **kwargs): """ Boxplot function :param x: str or ndarray :param y: str or ndarray :param data: pd.Dataframe, source of data :param legend: str or ndarray color code by this column :param marker: str, default marker to use in plots :param alpha: float, alpha for plots :param points: bool, display or not display points :param cumprob: bool, display cumprob plot? :param yscale: str, default = linear, can be log or symlog too :param cmap: str, matplotlib colormap :param figsize: tuple(int,int), figure size :param orderby: str, order x axis by this param :param datatable: bool, show or not show datatable is available :param fig: matplotlib figure, if you want to re-use the figure, pass in one already created :param axes: matplotlib axes, if this is specified, the boxplot will be created on that axes, and other axes will not be created. :param kwargs: :return: matplotlib figure """ # if no dataframe is supplied, create one if data is None: (x, y, _, legend, _, _), data = components.create_df(x, y, legend) df = data.copy() df = df.reset_index() df[x] = df[x].astype('str') df[y] = df[y].astype('float').dropna() # TODO: this doesn't really work right if orderby: temp = df.sort(x) t = temp.groupby(x)[orderby] map_of_x = col.OrderedDict() for mg in sorted(t.groups): g = t.get_group(mg).reset_index() map_of_x[mg] = g[orderby][0] list_to_order = sorted([value for value in map_of_x.values()]) order = [] x_to_loc = {} for k, v in map_of_x.items(): idx = list_to_order.index(v) x_to_loc[k] = idx order.append(idx) min_, max_ = np.min(df[y]), np.max(df[y]) # if an axis is supplied, we will not create another one # if a figure is supplied, we will reuse the figure if fig and not axes: fig = fig canvas = mbb.FigureCanvasAgg(fig) axm, axc, axl, axt = components.get_axes(fig) elif axes: axm = axes else: fig = mpl.figure.Figure(figsize=figsize, tight_layout=True) canvas = mbb.FigureCanvasAgg(fig) axm, axc, axl, axt = components.create_axes(cumprob, legend, table, fig=fig) if violin: array = [] for arr in sorted(set(df[x])): array.append(df[df[x] == arr][y]) axm.violinplot(array, showmedians=True) else: if orderby: df.boxplot(column=y, by=x, ax=axm, showfliers=False, positions=order, fontsize=8, **kwargs) else: df.boxplot(column=y, by=x, ax=axm, showfliers=False, fontsize=8, **kwargs) # We need to identify all of the unique entries in the groupby column unique_groups = set(df[x]) nonan_grps = [] for group in unique_groups: if 'nan' not in group: nonan_grps.append(group) if legend: # colormap is supposed to be the goto function to get all colormaps # should return a colorgrid that maps each point to a set of colors # if cgrid is already supplied, we will re-use that color grid if cgrid is None: cgrid = common.colors.colormap(df[legend], kind='discrete', cmap=cmap) legend_color = {} for i, key in df[legend].iteritems(): legend_color[key] = cgrid[i] if not axes: # skip over creation of legend if axes is provided axl = components.legend(sorted(list(legend_color.items())), axl) axl.set_title(legend, loc='left') # add all the point level data groups = sorted(nonan_grps) for j, val in enumerate(groups): ys = df[y][df[x] == val] if orderby: pos = x_to_loc[val] xs = np.random.normal(pos, 0.05, size=len(ys)) else: # create the jitters for the points xs = np.random.normal(j + 1, 0.05, size=len(ys)) if points: # if cgrid is None, that is the standard way of creating the plot # cgrid is typically supplied by the jp.grid function if legend or cgrid is not None: cs = cgrid[df[x] == val] axm.scatter(xs, ys.values, color=cs, marker=marker, alpha=alpha, linewidths=1, **kwargs) else: axm.scatter(xs, ys.values, marker=marker, alpha=alpha, linewidths=1, **kwargs) # skip creating the cumprob plot if the axes was supplied if cumprob and not axes: if legend: cs = cgrid[df[x] == val] axc = components.cumprob(ys, axc, color=cs, alpha=alpha, swapxy=True) else: axc = components.cumprob(ys, axc, alpha=alpha, swapxy=True) # various formating axm.set_ylim(min_, max_) axm.set_yscale(yscale) axm.set_ylabel(y) for label in axm.get_xticklabels(): label.set_rotation(90) if cumprob and not axes: axc.set_ylim(min_, max_) axc.set_yscale(yscale) axc.set_yticklabels([], visible=False) for label in axc.get_xticklabels(): label.set_rotation(90) if table and not axes: components.datatable(y, data, axt, by=x) axm.set_title('') if axes: return axm fig.suptitle('') return canvas.figure
def cumprob(x, data=None, legend=None, figsize=(12, 6), xscale='linear', yscale='linear', cmap='default', alpha=0.5, marker='.', table=True, fig=None, axes=None, cgrid=None, **kwargs): """ :param x: str or ndarray :param data: is x is a str, this is a pd.Dataframe :param legend: str or ndarray, :param figsize: default is 9,6; sets the figure size :param xscale: default is linear, set the scale type [linear, log, symlog] :param yscale: default is linear, set the scale type [linear, log, symlog] :param cmap: colormap to use for plotting :param alpha: default is 0.5 :param marker: set matplotlib marker :param table: bool, default is True, prints the datatable summary to the graph :param kwargs: passed to matplotlib hist function :param fig: matplotlib figure if you want to reuse the figure. :return: matplotlib figure """ # if no dataframe is supplied, create one if data is None: (x, _, _, legend, _, _), data = components.create_df(x, None, legend) df = data.copy() df = df.reset_index() df[x] = df[x].astype('float').dropna() min_, max_ = np.min(df[x]), np.max(df[x]) if fig: fig = fig canvas = mbb.FigureCanvasAgg(fig) axm, axc, axl, axt = components.get_axes(fig) elif axes: axm = axes else: fig = mpl.figure.Figure(figsize=figsize, tight_layout=True) canvas = mbb.FigureCanvasAgg(fig) axm, axc, axl, axt = components.create_axes(None, legend, table, fig=fig) if table and not axes: axt = components.datatable(x, data, axt, by=legend) if legend: # colormap is supposed to be the goto function to get all colormaps # should return a colorgrid that maps each point to a set of colors if cgrid is None: cgrid = common.colors.colormap(df[legend], kind='discrete', cmap=cmap) legend_color = {} for i, key in df[legend].iteritems(): legend_color[key] = cgrid[i] if not axes: axl = components.legend(sorted(list(legend_color.items())), axl) axl.set_title(legend,loc='left') for group in sorted(set(df[legend])): axm = components.cumprob(df[df[legend] == group][x], axm, color=legend_color[group], marker=marker, alpha=alpha) else: axm = components.cumprob(df[x], axm, marker=marker, alpha=alpha) # various formating for label in axm.get_xticklabels(): label.set_rotation(90) axm.set_xlim(min_, max_) axm.set_xscale(xscale) axm.set_yscale(yscale) axm.set_xlabel(x) if axes: return axm return canvas.figure
def cumprob(x, data=None, legend=None, figsize=(12, 6), xscale='linear', yscale='linear', cmap='default', alpha=0.5, marker='.', table=True, fig=None, axes=None, cgrid=None, **kwargs): """ :param x: str or ndarray :param data: is x is a str, this is a pd.Dataframe :param legend: str or ndarray, :param figsize: default is 9,6; sets the figure size :param xscale: default is linear, set the scale type [linear, log, symlog] :param yscale: default is linear, set the scale type [linear, log, symlog] :param cmap: colormap to use for plotting :param alpha: default is 0.5 :param marker: set matplotlib marker :param table: bool, default is True, prints the datatable summary to the graph :param kwargs: passed to matplotlib hist function :param fig: matplotlib figure if you want to reuse the figure. :return: matplotlib figure """ # if no dataframe is supplied, create one if data is None: (x, _, _, legend, _, _), data = components.create_df(x, None, legend) df = data.copy() df = df.reset_index() df[x] = df[x].astype('float').dropna() min_, max_ = np.min(df[x]), np.max(df[x]) if fig: fig = fig canvas = mbb.FigureCanvasAgg(fig) axm, axc, axl, axt = components.get_axes(fig) elif axes: axm = axes else: fig = mpl.figure.Figure(figsize=figsize, tight_layout=True) canvas = mbb.FigureCanvasAgg(fig) axm, axc, axl, axt = components.create_axes(None, legend, table, fig=fig) if table and not axes: axt = components.datatable(x, data, axt, by=legend) if legend: # colormap is supposed to be the goto function to get all colormaps # should return a colorgrid that maps each point to a set of colors if cgrid is None: cgrid = common.colors.colormap(df[legend], kind='discrete', cmap=cmap) legend_color = {} for i, key in df[legend].iteritems(): legend_color[key] = cgrid[i] if not axes: axl = components.legend(sorted(list(legend_color.items())), axl) axl.set_title(legend, loc='left') for group in sorted(set(df[legend])): axm = components.cumprob(df[df[legend] == group][x], axm, color=legend_color[group], marker=marker, alpha=alpha) else: axm = components.cumprob(df[x], axm, marker=marker, alpha=alpha) # various formating for label in axm.get_xticklabels(): label.set_rotation(90) axm.set_xlim(min_, max_) axm.set_xscale(xscale) axm.set_yscale(yscale) axm.set_xlabel(x) if axes: return axm return canvas.figure
def scatter(x, y, data=None, legend=None, marker='o', alpha=.5, xscale='linear', yscale='linear', cmap='default', figsize=(12, 6), fit=None, fitparams=None, table=True, fig=None, axes=None, cgrid=None, **kwargs): """ Scatter plots with regression lines :param x: str or ndarray :param y: str or ndarray :param data: pandas.Dataframe :param legend: str or ndarray, color/fit by this column :param marker: matplotlib marker style :param alpha: float, matplotlib alpha :param xscale: default == linear, any of matplotlib scale types :param yscale: default == linear, any of matplotlib scale types :param cmap: any of matplotlib cmaps :param figsize: default == (9,6); :param fit: [linear, quadratic, smooth, interpolate] :param fitparams: params to pass to fitting function :param table: show the regression table :param kwargs: :return: fig, (axes) """ # if no dataframe is supplied, create one if data is None: (x, y, _, legend, _, _), data = components.create_df(x, y, legend) if not fitparams: fitparams = {} df = data.copy() df = df[[i for i in (x, y, legend) if i]] # many of the fitting routines don't work with nan or non-sorted data. df = df.dropna() df.sort_values(x) df = df.reset_index() # fit axis is for the regression equations makefitaxis = False if fit == 'linear' or fit == 'quadratic': makefitaxis = True if fig: fig = fig canvas = mbb.FigureCanvasAgg(fig) axm, axc, axl, axt = components.get_axes(fig) elif axes: axm = axes else: fig = mpl.figure.Figure(figsize=figsize, tight_layout=True) canvas = mbb.FigureCanvasAgg(fig) axm, axc, axl, axt = components.create_axes(False, legend, table and makefitaxis, fig=fig) if legend: # colormap is supposed to be the goto function to get all colormaps # should return a colorgrid that maps each point to a set of colors if cgrid is None: cgrid = common.colors.colormap(df[legend], kind='discrete', cmap=cmap) legend_color = {} for i, key in df[legend].iteritems(): legend_color[key] = cgrid[i] # if the axis is supplied, we do not want to create a legend axis if not axes: components.legend(sorted(list(legend_color.items())), axl) axl.set_title(legend, loc='left') text = '' for l in sorted(set(df[legend])): t = df[df[legend] == l] axm.scatter(x=t[x], y=t[y], c=legend_color[l], marker=marker, alpha=alpha, **kwargs) if fit: xs, ys, fn = _get_fit(x, y, t, fit, fitparams) axm.plot(xs, ys, c=legend_color[l]) if makefitaxis and table: text += '${}: {}$\n'.format(str(l).strip(), fn) if makefitaxis and table and not axes: components.regressiontable(text, axt, fig) axt.axis('off') else: axm.scatter(x=df[x], y=df[y], marker=marker, alpha=alpha, **kwargs) if fit: xs, ys, fn = _get_fit(x, y, df, fit, fitparams) axm.plot(xs, ys) if makefitaxis and table: components.regressiontable('{}'.format(fn), axt, fig) axm.set_xlim(np.min(df[x]), np.max(df[x])) axm.set_ylim(np.min(df[y]), np.max(df[y])) axm.set_yscale(yscale) axm.set_xscale(xscale) axm.set_xlabel(x) axm.set_ylabel(y) if axes: return axm return canvas.figure
def contour(x, y, z, data=None, marker=None, alpha=.5, xscale='linear', yscale='linear', cmap=None, ncontours=100, gridsize=100, colorbar=True, labels=False, figsize=(12, 6), filled=True, fig=None, axes=None, cgrid=None, axislabels=True, axisticks=True, **kwargs): """ Create a contour plot from x, y, ans z values """ # if no dataframe is supplied, create one if data is None: (x, y, z, _), data = components.create_df(x, y, z, _) df = data.copy() df = df[[i for i in (x, y, z) if i]] df = df.dropna() df = df.reset_index() if fig and not axes: fig = fig canvas = mbb.FigureCanvasAgg(fig) axm, axc, axl, axt = components.get_axes_1(fig) elif axes: axm = axes else: fig = mpl.figure.Figure(figsize=figsize, tight_layout=True) canvas = mbb.FigureCanvasAgg(fig) axm, _, _, _ = components.create_axes(False, False, False, fig=fig) xi = np.linspace(np.min(df[x]), np.max(df[x]), gridsize) yi = np.linspace(np.min(df[y]), np.max(df[y]), gridsize) try: zi = mpl.mlab.griddata(df[x], df[y], df[z], xi, yi, interp='linear') except ValueError: return if filled: cf = axm.contourf(xi, yi, zi, ncontours, cmap=cmap, **kwargs) else: cf = axm.contour(xi, yi, zi, ncontours, cmap=cmap, **kwargs) if not axisticks: axm.get_xaxis().set_visible(False) axm.get_yaxis().set_visible(False) if marker: axm.scatter(df[x], df[y], marker=marker, color='k') if colorbar and not axes: fig.colorbar(cf) if labels: axm.clabel(cf) axm.set_xlim(np.min(df[x]), np.max(df[x])) axm.set_ylim(np.min(df[y]), np.max(df[y])) axm.set_yscale(yscale) axm.set_xscale(xscale) if axislabels: axm.set_xlabel(x) axm.set_ylabel(y) if axes: return axm else: return canvas.figure