def cumprob(x, data=None, legend=None, figsize=(12, 6), xscale='linear', yscale='linear', cmap='default', alpha=0.5, marker='.', table=True, fig=None, axes=None, cgrid=None, **kwargs): """ :param x: str or ndarray :param data: is x is a str, this is a pd.Dataframe :param legend: str or ndarray, :param figsize: default is 9,6; sets the figure size :param xscale: default is linear, set the scale type [linear, log, symlog] :param yscale: default is linear, set the scale type [linear, log, symlog] :param cmap: colormap to use for plotting :param alpha: default is 0.5 :param marker: set matplotlib marker :param table: bool, default is True, prints the datatable summary to the graph :param kwargs: passed to matplotlib hist function :param fig: matplotlib figure if you want to reuse the figure. :return: matplotlib figure """ # if no dataframe is supplied, create one if data is None: (x, _, _, legend, _, _), data = components.create_df(x, None, legend) df = data.copy() df = df.reset_index() df[x] = df[x].astype('float').dropna() min_, max_ = np.min(df[x]), np.max(df[x]) if fig: fig = fig canvas = mbb.FigureCanvasAgg(fig) axm, axc, axl, axt = components.get_axes(fig) elif axes: axm = axes else: fig = mpl.figure.Figure(figsize=figsize, tight_layout=True) canvas = mbb.FigureCanvasAgg(fig) axm, axc, axl, axt = components.create_axes(None, legend, table, fig=fig) if table and not axes: axt = components.datatable(x, data, axt, by=legend) if legend: # colormap is supposed to be the goto function to get all colormaps # should return a colorgrid that maps each point to a set of colors if cgrid is None: cgrid = common.colors.colormap(df[legend], kind='discrete', cmap=cmap) legend_color = {} for i, key in df[legend].iteritems(): legend_color[key] = cgrid[i] if not axes: axl = components.legend(sorted(list(legend_color.items())), axl) axl.set_title(legend,loc='left') for group in sorted(set(df[legend])): axm = components.cumprob(df[df[legend] == group][x], axm, color=legend_color[group], marker=marker, alpha=alpha) else: axm = components.cumprob(df[x], axm, marker=marker, alpha=alpha) # various formating for label in axm.get_xticklabels(): label.set_rotation(90) axm.set_xlim(min_, max_) axm.set_xscale(xscale) axm.set_yscale(yscale) axm.set_xlabel(x) if axes: return axm return canvas.figure
def boxplot(x, y, data=None, legend=None, marker='o', alpha=.5, points=True, cumprob=False, yscale='linear', cmap='default', figsize=(12, 6), orderby=None, table=True, fig=None, axes=None, cgrid=None, violin=False, **kwargs): """ Boxplot function :param x: str or ndarray :param y: str or ndarray :param data: pd.Dataframe, source of data :param legend: str or ndarray color code by this column :param marker: str, default marker to use in plots :param alpha: float, alpha for plots :param points: bool, display or not display points :param cumprob: bool, display cumprob plot? :param yscale: str, default = linear, can be log or symlog too :param cmap: str, matplotlib colormap :param figsize: tuple(int,int), figure size :param orderby: str, order x axis by this param :param datatable: bool, show or not show datatable is available :param fig: matplotlib figure, if you want to re-use the figure, pass in one already created :param axes: matplotlib axes, if this is specified, the boxplot will be created on that axes, and other axes will not be created. :param kwargs: :return: matplotlib figure """ # if no dataframe is supplied, create one if data is None: (x, y, _, legend, _, _), data = components.create_df(x, y, legend) df = data.copy() df = df.reset_index() df[x] = df[x].astype('str') df[y] = df[y].astype('float').dropna() # TODO: this doesn't really work right if orderby: temp = df.sort(x) t = temp.groupby(x)[orderby] map_of_x = col.OrderedDict() for mg in sorted(t.groups): g = t.get_group(mg).reset_index() map_of_x[mg] = g[orderby][0] list_to_order = sorted([value for value in map_of_x.values()]) order = [] x_to_loc = {} for k, v in map_of_x.items(): idx = list_to_order.index(v) x_to_loc[k] = idx order.append(idx) min_, max_ = np.min(df[y]), np.max(df[y]) # if an axis is supplied, we will not create another one # if a figure is supplied, we will reuse the figure if fig and not axes: fig = fig canvas = mbb.FigureCanvasAgg(fig) axm, axc, axl, axt = components.get_axes(fig) elif axes: axm = axes else: fig = mpl.figure.Figure(figsize=figsize, tight_layout=True) canvas = mbb.FigureCanvasAgg(fig) axm, axc, axl, axt = components.create_axes(cumprob, legend, table, fig=fig) if violin: array = [] for arr in sorted(set(df[x])): array.append(df[df[x] == arr][y]) axm.violinplot(array, showmedians=True) else: if orderby: df.boxplot(column=y, by=x, ax=axm, showfliers=False, positions=order, fontsize=8, **kwargs) else: df.boxplot(column=y, by=x, ax=axm, showfliers=False, fontsize=8, **kwargs) # We need to identify all of the unique entries in the groupby column unique_groups = set(df[x]) nonan_grps = [] for group in unique_groups: if 'nan' not in group: nonan_grps.append(group) if legend: # colormap is supposed to be the goto function to get all colormaps # should return a colorgrid that maps each point to a set of colors # if cgrid is already supplied, we will re-use that color grid if cgrid is None: cgrid = common.colors.colormap(df[legend], kind='discrete', cmap=cmap) legend_color = {} for i, key in df[legend].iteritems(): legend_color[key] = cgrid[i] if not axes: # skip over creation of legend if axes is provided axl = components.legend(sorted(list(legend_color.items())), axl) axl.set_title(legend, loc='left') # add all the point level data groups = sorted(nonan_grps) for j, val in enumerate(groups): ys = df[y][df[x] == val] if orderby: pos = x_to_loc[val] xs = np.random.normal(pos, 0.05, size=len(ys)) else: # create the jitters for the points xs = np.random.normal(j + 1, 0.05, size=len(ys)) if points: # if cgrid is None, that is the standard way of creating the plot # cgrid is typically supplied by the jp.grid function if legend or cgrid is not None: cs = cgrid[df[x] == val] axm.scatter(xs, ys.values, color=cs, marker=marker, alpha=alpha, linewidths=1, **kwargs) else: axm.scatter(xs, ys.values, marker=marker, alpha=alpha, linewidths=1, **kwargs) # skip creating the cumprob plot if the axes was supplied if cumprob and not axes: if legend: cs = cgrid[df[x] == val] axc = components.cumprob(ys, axc, color=cs, alpha=alpha, swapxy=True) else: axc = components.cumprob(ys, axc, alpha=alpha, swapxy=True) # various formating axm.set_ylim(min_, max_) axm.set_yscale(yscale) axm.set_ylabel(y) for label in axm.get_xticklabels(): label.set_rotation(90) if cumprob and not axes: axc.set_ylim(min_, max_) axc.set_yscale(yscale) axc.set_yticklabels([], visible=False) for label in axc.get_xticklabels(): label.set_rotation(90) if table and not axes: components.datatable(y, data, axt, by=x) axm.set_title('') if axes: return axm fig.suptitle('') return canvas.figure
def cumprob(x, data=None, legend=None, figsize=(12, 6), xscale='linear', yscale='linear', cmap='default', alpha=0.5, marker='.', table=True, fig=None, axes=None, cgrid=None, **kwargs): """ :param x: str or ndarray :param data: is x is a str, this is a pd.Dataframe :param legend: str or ndarray, :param figsize: default is 9,6; sets the figure size :param xscale: default is linear, set the scale type [linear, log, symlog] :param yscale: default is linear, set the scale type [linear, log, symlog] :param cmap: colormap to use for plotting :param alpha: default is 0.5 :param marker: set matplotlib marker :param table: bool, default is True, prints the datatable summary to the graph :param kwargs: passed to matplotlib hist function :param fig: matplotlib figure if you want to reuse the figure. :return: matplotlib figure """ # if no dataframe is supplied, create one if data is None: (x, _, _, legend, _, _), data = components.create_df(x, None, legend) df = data.copy() df = df.reset_index() df[x] = df[x].astype('float').dropna() min_, max_ = np.min(df[x]), np.max(df[x]) if fig: fig = fig canvas = mbb.FigureCanvasAgg(fig) axm, axc, axl, axt = components.get_axes(fig) elif axes: axm = axes else: fig = mpl.figure.Figure(figsize=figsize, tight_layout=True) canvas = mbb.FigureCanvasAgg(fig) axm, axc, axl, axt = components.create_axes(None, legend, table, fig=fig) if table and not axes: axt = components.datatable(x, data, axt, by=legend) if legend: # colormap is supposed to be the goto function to get all colormaps # should return a colorgrid that maps each point to a set of colors if cgrid is None: cgrid = common.colors.colormap(df[legend], kind='discrete', cmap=cmap) legend_color = {} for i, key in df[legend].iteritems(): legend_color[key] = cgrid[i] if not axes: axl = components.legend(sorted(list(legend_color.items())), axl) axl.set_title(legend, loc='left') for group in sorted(set(df[legend])): axm = components.cumprob(df[df[legend] == group][x], axm, color=legend_color[group], marker=marker, alpha=alpha) else: axm = components.cumprob(df[x], axm, marker=marker, alpha=alpha) # various formating for label in axm.get_xticklabels(): label.set_rotation(90) axm.set_xlim(min_, max_) axm.set_xscale(xscale) axm.set_yscale(yscale) axm.set_xlabel(x) if axes: return axm return canvas.figure
def grid(rows=None, cols=None, data=None, chart=None, args=None, figsize=(8, 8), legend=None, cmap='default', colorbar=False): """ Create a grid from pandas data :param grid: dictionary of x and y columns :param data: pandas dataframe or none :param funct: jumpy plotting fuction, specified as a lambda with data source as a variable :param args: argument dictionary to pass to the chart :param legend: color by this column :param figsize: tuple to set figsize :param cmap: matplotlib colormap to use :return: """ df = data.copy() rows_array, cols_array = [], [] try: cols_array = sorted(set(df[cols])) except (KeyError, NameError, ValueError): pass try: rows_array = sorted(set(df[rows])) except (KeyError, NameError, ValueError): pass numcols = len(cols_array) if cols_array else 1 numrows = len(rows_array) if rows_array else 1 numcols += 1 # add a row and column for headings numrows += 1 if legend: numcols += 1 fig = mpl.figure.Figure(figsize=figsize, tight_layout=True) canvas = mbb.FigureCanvasAgg(fig) if len(cols_array): wrs = [1] + [5 for i in cols_array] else: wrs = [1, 5] if legend: wrs += [1] if rows_array: hrs = [1] + [5 for i in rows_array] else: hrs = [1, 5] grid = gs.GridSpec(numrows, numcols, width_ratios=wrs, height_ratios=hrs) if len(cols_array) > 0: x = 1 if len(rows_array) else 1 for i, val in enumerate(cols_array, start=x): ax = fig.add_subplot(grid[0, i]) ax.text(.5, .3, val) ax.axis('off') p = mpl.patches.Rectangle((0, 0), 1, 1, fill=True, transform=ax.transAxes, clip_on=True, fc='#C8C8C8') ax.add_patch(p) if len(rows_array) > 0: y = 1 if len(cols_array) else 1 for i, val in enumerate(rows_array, start=y): ax = fig.add_subplot(grid[i, 0]) ax.text(.5, .5, val, rotation=90) ax.axis('off') p = mpl.patches.Rectangle((0, 0), 1, 1, fill=True, transform=ax.transAxes, clip_on=True, fc='#C8C8C8') ax.add_patch(p) # if rows and columns are provided, we need all combinations # itertools product will return nothing if one of the cols/rows is None # so then we will default to the longest of the cols/rows charts = list(itertools.product(cols_array, rows_array)) if not list(charts): try: charts = list(itertools.zip_longest(cols_array, rows_array)) except AttributeError: #py2 charts = list(itertools.izip_longest(cols_array, rows_array)) if legend: cgrid = common.colors.colormap(df[legend], kind='discrete', cmap=cmap) for x, y in charts: # fitler the data for the exact chart we are looking at tdf = df[df[cols] == x] if (x and cols) else df tdf = tdf[tdf[rows] == y] if (y and rows) else tdf if tdf.size == 0: continue # filter te color grid to match the chart data tc = None if legend: tc = cgrid[df[cols] == x] if (x and cols) else cgrid tc = tc[df[rows] == y] if (y and rows) else tc tc = tc.reset_index(drop=True) ax = fig.add_subplot(grid[rows_array.index(y) + 1 if y else 1, cols_array.index(x) + 1 if x else 1]) # call the particular chart in provided if legend: chart(data=tdf, axes=ax, cgrid=tc, legend=legend, **args) else: chart(data=tdf, axes=ax, cgrid=tc, **args) if legend: legend_color = {} for i, key in df[legend].iteritems(): legend_color[key] = cgrid[i] axl = fig.add_subplot(grid[1, numcols - 1]) axl = components.legend(sorted(list(legend_color.items())), axl) axl.set_title(legend, loc='left') fig.suptitle('') return canvas.figure
def grid(rows=None, cols=None, data=None, chart=None, args=None, figsize=(8, 8), legend=None, cmap='default', colorbar=False): """ Create a grid from pandas data :param grid: dictionary of x and y columns :param data: pandas dataframe or none :param funct: jumpy plotting fuction, specified as a lambda with data source as a variable :param args: argument dictionary to pass to the chart :param legend: color by this column :param figsize: tuple to set figsize :param cmap: matplotlib colormap to use :return: """ df = data.copy() rows_array, cols_array = [], [] try: cols_array = sorted(set(df[cols])) except (KeyError, NameError, ValueError): pass try: rows_array = sorted(set(df[rows])) except (KeyError, NameError, ValueError): pass numcols = len(cols_array) if cols_array else 1 numrows = len(rows_array) if rows_array else 1 numcols += 1 # add a row and column for headings numrows += 1 if legend: numcols += 1 fig = mpl.figure.Figure(figsize=figsize, tight_layout=True) canvas = mbb.FigureCanvasAgg(fig) if len(cols_array): wrs = [1] + [5 for i in cols_array] else: wrs = [1, 5] if legend: wrs += [1] if rows_array: hrs = [1] + [5 for i in rows_array] else: hrs = [1, 5] grid = gs.GridSpec(numrows, numcols, width_ratios=wrs, height_ratios=hrs) if len(cols_array) > 0: x = 1 if len(rows_array) else 1 for i, val in enumerate(cols_array, start=x): ax = fig.add_subplot(grid[0, i]) ax.text(.5, .3, val) ax.axis('off') p = mpl.patches.Rectangle((0, 0), 1, 1, fill=True, transform=ax.transAxes, clip_on=True, fc='#C8C8C8') ax.add_patch(p) if len(rows_array) > 0: y = 1 if len(cols_array) else 1 for i, val in enumerate(rows_array, start=y): ax = fig.add_subplot(grid[i, 0]) ax.text(.5, .5, val, rotation=90) ax.axis('off') p = mpl.patches.Rectangle((0, 0), 1, 1, fill=True, transform=ax.transAxes, clip_on=True, fc='#C8C8C8') ax.add_patch(p) # if rows and columns are provided, we need all combinations # itertools product will return nothing if one of the cols/rows is None # so then we will default to the longest of the cols/rows charts = list(itertools.product(cols_array, rows_array)) if not list(charts): try: charts = list(itertools.zip_longest(cols_array, rows_array)) except AttributeError: #py2 charts = list(itertools.izip_longest(cols_array, rows_array)) if legend: cgrid = common.colors.colormap(df[legend], kind='discrete', cmap=cmap) for x, y in charts: # fitler the data for the exact chart we are looking at tdf = df[df[cols] == x] if (x and cols) else df tdf = tdf[tdf[rows] == y] if (y and rows) else tdf if tdf.size == 0: continue # filter te color grid to match the chart data tc = None if legend: tc = cgrid[df[cols] == x] if (x and cols) else cgrid tc = tc[df[rows] == y] if (y and rows) else tc tc = tc.reset_index(drop=True) ax = fig.add_subplot(grid[rows_array.index(y) + 1 if y else 1, cols_array.index(x) + 1 if x else 1]) # call the particular chart in provided if legend: chart(data=tdf, axes=ax, cgrid=tc, legend=legend, **args) else: chart(data=tdf, axes=ax, cgrid=tc, **args) if legend: legend_color = {} for i, key in df[legend].iteritems(): legend_color[key] = cgrid[i] axl = fig.add_subplot(grid[1, numcols-1]) axl = components.legend(sorted(list(legend_color.items())), axl) axl.set_title(legend, loc='left') fig.suptitle('') return canvas.figure
def scatter(x, y, data=None, legend=None, marker='o', alpha=.5, xscale='linear', yscale='linear', cmap='default', figsize=(12, 6), fit=None, fitparams=None, table=True, fig=None, axes=None, cgrid=None, **kwargs): """ Scatter plots with regression lines :param x: str or ndarray :param y: str or ndarray :param data: pandas.Dataframe :param legend: str or ndarray, color/fit by this column :param marker: matplotlib marker style :param alpha: float, matplotlib alpha :param xscale: default == linear, any of matplotlib scale types :param yscale: default == linear, any of matplotlib scale types :param cmap: any of matplotlib cmaps :param figsize: default == (9,6); :param fit: [linear, quadratic, smooth, interpolate] :param fitparams: params to pass to fitting function :param table: show the regression table :param kwargs: :return: fig, (axes) """ # if no dataframe is supplied, create one if data is None: (x, y, _, legend, _, _), data = components.create_df(x, y, legend) if not fitparams: fitparams = {} df = data.copy() df = df[[i for i in (x, y, legend) if i]] # many of the fitting routines don't work with nan or non-sorted data. df = df.dropna() df.sort_values(x) df = df.reset_index() # fit axis is for the regression equations makefitaxis = False if fit == 'linear' or fit == 'quadratic': makefitaxis = True if fig: fig = fig canvas = mbb.FigureCanvasAgg(fig) axm, axc, axl, axt = components.get_axes(fig) elif axes: axm = axes else: fig = mpl.figure.Figure(figsize=figsize, tight_layout=True) canvas = mbb.FigureCanvasAgg(fig) axm, axc, axl, axt = components.create_axes(False, legend, table and makefitaxis, fig=fig) if legend: # colormap is supposed to be the goto function to get all colormaps # should return a colorgrid that maps each point to a set of colors if cgrid is None: cgrid = common.colors.colormap(df[legend], kind='discrete', cmap=cmap) legend_color = {} for i, key in df[legend].iteritems(): legend_color[key] = cgrid[i] # if the axis is supplied, we do not want to create a legend axis if not axes: components.legend(sorted(list(legend_color.items())), axl) axl.set_title(legend, loc='left') text = '' for l in sorted(set(df[legend])): t = df[df[legend] == l] axm.scatter(x=t[x], y=t[y], c=legend_color[l], marker=marker, alpha=alpha, **kwargs) if fit: xs, ys, fn = _get_fit(x, y, t, fit, fitparams) axm.plot(xs, ys, c=legend_color[l]) if makefitaxis and table: text += '${}: {}$\n'.format(str(l).strip(), fn) if makefitaxis and table and not axes: components.regressiontable(text, axt, fig) axt.axis('off') else: axm.scatter(x=df[x], y=df[y], marker=marker, alpha=alpha, **kwargs) if fit: xs, ys, fn = _get_fit(x, y, df, fit, fitparams) axm.plot(xs, ys) if makefitaxis and table: components.regressiontable('{}'.format(fn), axt, fig) axm.set_xlim(np.min(df[x]), np.max(df[x])) axm.set_ylim(np.min(df[y]), np.max(df[y])) axm.set_yscale(yscale) axm.set_xscale(xscale) axm.set_xlabel(x) axm.set_ylabel(y) if axes: return axm return canvas.figure