Example #1
0
def cumprob(x, data=None, legend=None, figsize=(12, 6),
            xscale='linear', yscale='linear', cmap='default', alpha=0.5,
            marker='.', table=True, fig=None, axes=None, cgrid=None, **kwargs):
    """
    :param x:  str or ndarray
    :param data: is x is a str, this is a pd.Dataframe
    :param legend: str or ndarray,
    :param figsize: default is 9,6; sets the figure size
    :param xscale: default is linear, set the scale type [linear, log, symlog]
    :param yscale: default is linear, set the scale type [linear, log, symlog]
    :param cmap: colormap to use for plotting
    :param alpha: default is 0.5
    :param marker: set matplotlib marker
    :param table: bool, default is True, prints the datatable summary to the graph
    :param kwargs:  passed to matplotlib hist function
    :param fig: matplotlib figure if you want to reuse the figure.
    :return: matplotlib figure
    """

    # if no dataframe is supplied, create one
    if data is None:
        (x, _, _, legend, _, _), data = components.create_df(x, None, legend)

    df = data.copy()
    df = df.reset_index()
    df[x] = df[x].astype('float').dropna()

    min_, max_ = np.min(df[x]), np.max(df[x])

    if fig:
        fig = fig
        canvas = mbb.FigureCanvasAgg(fig)
        axm, axc, axl, axt = components.get_axes(fig)
    elif axes:
        axm = axes
    else:
        fig = mpl.figure.Figure(figsize=figsize, tight_layout=True)
        canvas = mbb.FigureCanvasAgg(fig)
        axm, axc, axl, axt = components.create_axes(None, legend, table, fig=fig)

    if table and not axes:
        axt = components.datatable(x, data, axt, by=legend)

    if legend:
        # colormap is supposed to be the goto function to get all colormaps
        # should return a colorgrid that maps each point to a set of colors
        if cgrid is None:
            cgrid = common.colors.colormap(df[legend], kind='discrete', cmap=cmap)


        legend_color = {}
        for i, key in df[legend].iteritems():
            legend_color[key] = cgrid[i]

        if not axes:
            axl = components.legend(sorted(list(legend_color.items())), axl)
            axl.set_title(legend,loc='left')

        for group in sorted(set(df[legend])):
            axm = components.cumprob(df[df[legend] == group][x],
                                       axm,
                                       color=legend_color[group],
                                       marker=marker,
                                       alpha=alpha)
    else:
        axm = components.cumprob(df[x], axm, marker=marker, alpha=alpha)

    # various formating
    for label in axm.get_xticklabels():
        label.set_rotation(90)
    axm.set_xlim(min_, max_)
    axm.set_xscale(xscale)
    axm.set_yscale(yscale)
    axm.set_xlabel(x)

    if axes:
        return axm

    return canvas.figure
Example #2
0
def boxplot(x, y, data=None, legend=None, marker='o',
            alpha=.5, points=True, cumprob=False, yscale='linear',
            cmap='default', figsize=(12, 6),  orderby=None, table=True,
            fig=None, axes=None, cgrid=None, violin=False, **kwargs):
    """
    Boxplot function
    :param x: str or ndarray
    :param y: str or ndarray
    :param data: pd.Dataframe, source of data
    :param legend: str or ndarray color code by this column
    :param marker: str, default marker to use in plots
    :param alpha: float, alpha for plots
    :param points: bool, display or not display points
    :param cumprob: bool, display cumprob plot?
    :param yscale: str, default = linear, can be log or symlog too
    :param cmap: str, matplotlib colormap
    :param figsize: tuple(int,int), figure size
    :param orderby: str, order x axis by this param
    :param datatable: bool, show or not show datatable is available
    :param fig: matplotlib figure, if you want to re-use the figure, pass in one already created
    :param axes: matplotlib axes, if this is specified, the boxplot will be created on that axes,
                    and other axes will not be created.
    :param kwargs:


    :return: matplotlib figure
    """

    # if no dataframe is supplied, create one
    if data is None:
        (x, y, _, legend, _, _), data = components.create_df(x, y, legend)

    df = data.copy()
    df = df.reset_index()
    df[x] = df[x].astype('str')
    df[y] = df[y].astype('float').dropna()

    # TODO:  this doesn't really work right
    if orderby:
        temp = df.sort(x)
        t = temp.groupby(x)[orderby]
        map_of_x = col.OrderedDict()
        for mg in sorted(t.groups):
            g = t.get_group(mg).reset_index()
            map_of_x[mg] = g[orderby][0]

        list_to_order = sorted([value for value in map_of_x.values()])

        order = []
        x_to_loc = {}
        for k, v in map_of_x.items():
            idx = list_to_order.index(v)
            x_to_loc[k] = idx
            order.append(idx)

    min_, max_ = np.min(df[y]), np.max(df[y])

    # if an axis is supplied, we will not create another one
    # if a figure is supplied, we will reuse the figure
    if fig and not axes:
        fig = fig
        canvas = mbb.FigureCanvasAgg(fig)
        axm, axc, axl, axt = components.get_axes(fig)
    elif axes:
        axm = axes
    else:
        fig = mpl.figure.Figure(figsize=figsize, tight_layout=True)
        canvas = mbb.FigureCanvasAgg(fig)
        axm, axc, axl, axt = components.create_axes(cumprob, legend, table, fig=fig)

    if violin:
        array = []
        for arr in sorted(set(df[x])):
            array.append(df[df[x] == arr][y])

        axm.violinplot(array, showmedians=True)

    else:
        if orderby:
            df.boxplot(column=y, by=x, ax=axm, showfliers=False,
                       positions=order, fontsize=8, **kwargs)
        else:
            df.boxplot(column=y, by=x, ax=axm, showfliers=False, fontsize=8, **kwargs)

    # We need to identify all of the unique entries in the groupby column
    unique_groups = set(df[x])
    nonan_grps = []
    for group in unique_groups:
        if 'nan' not in group:
            nonan_grps.append(group)

    if legend:
        # colormap is supposed to be the goto function to get all colormaps
        # should return a colorgrid that maps each point to a set of colors
        # if cgrid is already supplied, we will re-use that color grid
        if cgrid is None:
            cgrid = common.colors.colormap(df[legend], kind='discrete', cmap=cmap)

        legend_color = {}
        for i, key in df[legend].iteritems():
            legend_color[key] = cgrid[i]

        if not axes:  # skip over creation of legend if axes is provided
            axl = components.legend(sorted(list(legend_color.items())), axl)
            axl.set_title(legend, loc='left')

    # add all the point level data
    groups = sorted(nonan_grps)
    for j, val in enumerate(groups):
        ys = df[y][df[x] == val]
        if orderby:
            pos = x_to_loc[val]
            xs = np.random.normal(pos, 0.05, size=len(ys))
        else:
            # create the jitters for the points
            xs = np.random.normal(j + 1, 0.05, size=len(ys))

        if points:

            # if cgrid is None, that is the standard way of creating the plot
            # cgrid is typically supplied by the jp.grid function
            if legend or cgrid is not None:
                cs = cgrid[df[x] == val]
                axm.scatter(xs, ys.values, color=cs, marker=marker, alpha=alpha,
                            linewidths=1, **kwargs)
            else:
                axm.scatter(xs, ys.values, marker=marker, alpha=alpha,
                            linewidths=1, **kwargs)

        # skip creating the cumprob plot if the axes was supplied
        if cumprob and not axes:
            if legend:
                cs = cgrid[df[x] == val]
                axc = components.cumprob(ys, axc, color=cs, alpha=alpha, swapxy=True)
            else:
                axc = components.cumprob(ys, axc, alpha=alpha, swapxy=True)

    # various formating
    axm.set_ylim(min_, max_)
    axm.set_yscale(yscale)
    axm.set_ylabel(y)
    for label in axm.get_xticklabels():
        label.set_rotation(90)

    if cumprob and not axes:
        axc.set_ylim(min_, max_)
        axc.set_yscale(yscale)
        axc.set_yticklabels([], visible=False)

        for label in axc.get_xticklabels():
            label.set_rotation(90)

    if table and not axes:
        components.datatable(y, data, axt, by=x)

    axm.set_title('')

    if axes:
        return axm

    fig.suptitle('')
    return canvas.figure
Example #3
0
def cumprob(x,
            data=None,
            legend=None,
            figsize=(12, 6),
            xscale='linear',
            yscale='linear',
            cmap='default',
            alpha=0.5,
            marker='.',
            table=True,
            fig=None,
            axes=None,
            cgrid=None,
            **kwargs):
    """
    :param x:  str or ndarray
    :param data: is x is a str, this is a pd.Dataframe
    :param legend: str or ndarray,
    :param figsize: default is 9,6; sets the figure size
    :param xscale: default is linear, set the scale type [linear, log, symlog]
    :param yscale: default is linear, set the scale type [linear, log, symlog]
    :param cmap: colormap to use for plotting
    :param alpha: default is 0.5
    :param marker: set matplotlib marker
    :param table: bool, default is True, prints the datatable summary to the graph
    :param kwargs:  passed to matplotlib hist function
    :param fig: matplotlib figure if you want to reuse the figure.
    :return: matplotlib figure
    """

    # if no dataframe is supplied, create one
    if data is None:
        (x, _, _, legend, _, _), data = components.create_df(x, None, legend)

    df = data.copy()
    df = df.reset_index()
    df[x] = df[x].astype('float').dropna()

    min_, max_ = np.min(df[x]), np.max(df[x])

    if fig:
        fig = fig
        canvas = mbb.FigureCanvasAgg(fig)
        axm, axc, axl, axt = components.get_axes(fig)
    elif axes:
        axm = axes
    else:
        fig = mpl.figure.Figure(figsize=figsize, tight_layout=True)
        canvas = mbb.FigureCanvasAgg(fig)
        axm, axc, axl, axt = components.create_axes(None,
                                                    legend,
                                                    table,
                                                    fig=fig)

    if table and not axes:
        axt = components.datatable(x, data, axt, by=legend)

    if legend:
        # colormap is supposed to be the goto function to get all colormaps
        # should return a colorgrid that maps each point to a set of colors
        if cgrid is None:
            cgrid = common.colors.colormap(df[legend],
                                           kind='discrete',
                                           cmap=cmap)

        legend_color = {}
        for i, key in df[legend].iteritems():
            legend_color[key] = cgrid[i]

        if not axes:
            axl = components.legend(sorted(list(legend_color.items())), axl)
            axl.set_title(legend, loc='left')

        for group in sorted(set(df[legend])):
            axm = components.cumprob(df[df[legend] == group][x],
                                     axm,
                                     color=legend_color[group],
                                     marker=marker,
                                     alpha=alpha)
    else:
        axm = components.cumprob(df[x], axm, marker=marker, alpha=alpha)

    # various formating
    for label in axm.get_xticklabels():
        label.set_rotation(90)
    axm.set_xlim(min_, max_)
    axm.set_xscale(xscale)
    axm.set_yscale(yscale)
    axm.set_xlabel(x)

    if axes:
        return axm

    return canvas.figure
Example #4
0
def boxplot(x,
            y,
            data=None,
            legend=None,
            marker='o',
            alpha=.5,
            points=True,
            cumprob=False,
            yscale='linear',
            cmap='default',
            figsize=(12, 6),
            orderby=None,
            table=True,
            fig=None,
            axes=None,
            cgrid=None,
            violin=False,
            **kwargs):
    """
    Boxplot function
    :param x: str or ndarray
    :param y: str or ndarray
    :param data: pd.Dataframe, source of data
    :param legend: str or ndarray color code by this column
    :param marker: str, default marker to use in plots
    :param alpha: float, alpha for plots
    :param points: bool, display or not display points
    :param cumprob: bool, display cumprob plot?
    :param yscale: str, default = linear, can be log or symlog too
    :param cmap: str, matplotlib colormap
    :param figsize: tuple(int,int), figure size
    :param orderby: str, order x axis by this param
    :param datatable: bool, show or not show datatable is available
    :param fig: matplotlib figure, if you want to re-use the figure, pass in one already created
    :param axes: matplotlib axes, if this is specified, the boxplot will be created on that axes,
                    and other axes will not be created.
    :param kwargs:


    :return: matplotlib figure
    """

    # if no dataframe is supplied, create one
    if data is None:
        (x, y, _, legend, _, _), data = components.create_df(x, y, legend)

    df = data.copy()
    df = df.reset_index()
    df[x] = df[x].astype('str')
    df[y] = df[y].astype('float').dropna()

    # TODO:  this doesn't really work right
    if orderby:
        temp = df.sort(x)
        t = temp.groupby(x)[orderby]
        map_of_x = col.OrderedDict()
        for mg in sorted(t.groups):
            g = t.get_group(mg).reset_index()
            map_of_x[mg] = g[orderby][0]

        list_to_order = sorted([value for value in map_of_x.values()])

        order = []
        x_to_loc = {}
        for k, v in map_of_x.items():
            idx = list_to_order.index(v)
            x_to_loc[k] = idx
            order.append(idx)

    min_, max_ = np.min(df[y]), np.max(df[y])

    # if an axis is supplied, we will not create another one
    # if a figure is supplied, we will reuse the figure
    if fig and not axes:
        fig = fig
        canvas = mbb.FigureCanvasAgg(fig)
        axm, axc, axl, axt = components.get_axes(fig)
    elif axes:
        axm = axes
    else:
        fig = mpl.figure.Figure(figsize=figsize, tight_layout=True)
        canvas = mbb.FigureCanvasAgg(fig)
        axm, axc, axl, axt = components.create_axes(cumprob,
                                                    legend,
                                                    table,
                                                    fig=fig)

    if violin:
        array = []
        for arr in sorted(set(df[x])):
            array.append(df[df[x] == arr][y])

        axm.violinplot(array, showmedians=True)

    else:
        if orderby:
            df.boxplot(column=y,
                       by=x,
                       ax=axm,
                       showfliers=False,
                       positions=order,
                       fontsize=8,
                       **kwargs)
        else:
            df.boxplot(column=y,
                       by=x,
                       ax=axm,
                       showfliers=False,
                       fontsize=8,
                       **kwargs)

    # We need to identify all of the unique entries in the groupby column
    unique_groups = set(df[x])
    nonan_grps = []
    for group in unique_groups:
        if 'nan' not in group:
            nonan_grps.append(group)

    if legend:
        # colormap is supposed to be the goto function to get all colormaps
        # should return a colorgrid that maps each point to a set of colors
        # if cgrid is already supplied, we will re-use that color grid
        if cgrid is None:
            cgrid = common.colors.colormap(df[legend],
                                           kind='discrete',
                                           cmap=cmap)

        legend_color = {}
        for i, key in df[legend].iteritems():
            legend_color[key] = cgrid[i]

        if not axes:  # skip over creation of legend if axes is provided
            axl = components.legend(sorted(list(legend_color.items())), axl)
            axl.set_title(legend, loc='left')

    # add all the point level data
    groups = sorted(nonan_grps)
    for j, val in enumerate(groups):
        ys = df[y][df[x] == val]
        if orderby:
            pos = x_to_loc[val]
            xs = np.random.normal(pos, 0.05, size=len(ys))
        else:
            # create the jitters for the points
            xs = np.random.normal(j + 1, 0.05, size=len(ys))

        if points:

            # if cgrid is None, that is the standard way of creating the plot
            # cgrid is typically supplied by the jp.grid function
            if legend or cgrid is not None:
                cs = cgrid[df[x] == val]
                axm.scatter(xs,
                            ys.values,
                            color=cs,
                            marker=marker,
                            alpha=alpha,
                            linewidths=1,
                            **kwargs)
            else:
                axm.scatter(xs,
                            ys.values,
                            marker=marker,
                            alpha=alpha,
                            linewidths=1,
                            **kwargs)

        # skip creating the cumprob plot if the axes was supplied
        if cumprob and not axes:
            if legend:
                cs = cgrid[df[x] == val]
                axc = components.cumprob(ys,
                                         axc,
                                         color=cs,
                                         alpha=alpha,
                                         swapxy=True)
            else:
                axc = components.cumprob(ys, axc, alpha=alpha, swapxy=True)

    # various formating
    axm.set_ylim(min_, max_)
    axm.set_yscale(yscale)
    axm.set_ylabel(y)
    for label in axm.get_xticklabels():
        label.set_rotation(90)

    if cumprob and not axes:
        axc.set_ylim(min_, max_)
        axc.set_yscale(yscale)
        axc.set_yticklabels([], visible=False)

        for label in axc.get_xticklabels():
            label.set_rotation(90)

    if table and not axes:
        components.datatable(y, data, axt, by=x)

    axm.set_title('')

    if axes:
        return axm

    fig.suptitle('')
    return canvas.figure
Example #5
0
def grid(rows=None,
         cols=None,
         data=None,
         chart=None,
         args=None,
         figsize=(8, 8),
         legend=None,
         cmap='default',
         colorbar=False):
    """ Create a grid from pandas data

    :param grid:  dictionary of x and y columns
    :param data:  pandas dataframe or none
    :param funct:  jumpy plotting fuction, specified as a lambda
                   with data source as a variable
    :param args:  argument dictionary to pass to the chart
    :param legend: color by this column
    :param figsize: tuple to set figsize
    :param cmap: matplotlib colormap to use
    :return:
    """

    df = data.copy()
    rows_array, cols_array = [], []
    try:
        cols_array = sorted(set(df[cols]))
    except (KeyError, NameError, ValueError):
        pass

    try:
        rows_array = sorted(set(df[rows]))
    except (KeyError, NameError, ValueError):
        pass

    numcols = len(cols_array) if cols_array else 1
    numrows = len(rows_array) if rows_array else 1

    numcols += 1  # add a row and column for headings
    numrows += 1

    if legend:
        numcols += 1

    fig = mpl.figure.Figure(figsize=figsize, tight_layout=True)
    canvas = mbb.FigureCanvasAgg(fig)

    if len(cols_array):
        wrs = [1] + [5 for i in cols_array]
    else:
        wrs = [1, 5]

    if legend:
        wrs += [1]

    if rows_array:
        hrs = [1] + [5 for i in rows_array]
    else:
        hrs = [1, 5]

    grid = gs.GridSpec(numrows, numcols, width_ratios=wrs, height_ratios=hrs)

    if len(cols_array) > 0:
        x = 1 if len(rows_array) else 1
        for i, val in enumerate(cols_array, start=x):
            ax = fig.add_subplot(grid[0, i])
            ax.text(.5, .3, val)
            ax.axis('off')
            p = mpl.patches.Rectangle((0, 0),
                                      1,
                                      1,
                                      fill=True,
                                      transform=ax.transAxes,
                                      clip_on=True,
                                      fc='#C8C8C8')
            ax.add_patch(p)

    if len(rows_array) > 0:
        y = 1 if len(cols_array) else 1
        for i, val in enumerate(rows_array, start=y):
            ax = fig.add_subplot(grid[i, 0])
            ax.text(.5, .5, val, rotation=90)
            ax.axis('off')
            p = mpl.patches.Rectangle((0, 0),
                                      1,
                                      1,
                                      fill=True,
                                      transform=ax.transAxes,
                                      clip_on=True,
                                      fc='#C8C8C8')
            ax.add_patch(p)

    # if rows and columns are provided, we need all combinations
    # itertools product will return nothing if one of the cols/rows is None
    # so then we will default to the longest of the cols/rows
    charts = list(itertools.product(cols_array, rows_array))
    if not list(charts):
        try:
            charts = list(itertools.zip_longest(cols_array, rows_array))
        except AttributeError:  #py2
            charts = list(itertools.izip_longest(cols_array, rows_array))

    if legend:
        cgrid = common.colors.colormap(df[legend], kind='discrete', cmap=cmap)

    for x, y in charts:
        # fitler the data for the exact chart we are looking at
        tdf = df[df[cols] == x] if (x and cols) else df
        tdf = tdf[tdf[rows] == y] if (y and rows) else tdf

        if tdf.size == 0:
            continue

        # filter te color grid to match the chart data
        tc = None
        if legend:
            tc = cgrid[df[cols] == x] if (x and cols) else cgrid
            tc = tc[df[rows] == y] if (y and rows) else tc
            tc = tc.reset_index(drop=True)

        ax = fig.add_subplot(grid[rows_array.index(y) + 1 if y else 1,
                                  cols_array.index(x) + 1 if x else 1])

        # call the particular chart in provided
        if legend:
            chart(data=tdf, axes=ax, cgrid=tc, legend=legend, **args)
        else:
            chart(data=tdf, axes=ax, cgrid=tc, **args)

    if legend:
        legend_color = {}
        for i, key in df[legend].iteritems():
            legend_color[key] = cgrid[i]

        axl = fig.add_subplot(grid[1, numcols - 1])
        axl = components.legend(sorted(list(legend_color.items())), axl)
        axl.set_title(legend, loc='left')

    fig.suptitle('')
    return canvas.figure
Example #6
0
def grid(rows=None, cols=None, data=None, chart=None, args=None, figsize=(8, 8), legend=None,
         cmap='default', colorbar=False):
    """ Create a grid from pandas data

    :param grid:  dictionary of x and y columns
    :param data:  pandas dataframe or none
    :param funct:  jumpy plotting fuction, specified as a lambda
                   with data source as a variable
    :param args:  argument dictionary to pass to the chart
    :param legend: color by this column
    :param figsize: tuple to set figsize
    :param cmap: matplotlib colormap to use
    :return:
    """

    df = data.copy()
    rows_array, cols_array = [], []
    try:
        cols_array = sorted(set(df[cols]))
    except (KeyError, NameError, ValueError):
        pass

    try:
        rows_array = sorted(set(df[rows]))
    except (KeyError, NameError, ValueError):
        pass

    numcols = len(cols_array) if cols_array else 1
    numrows = len(rows_array) if rows_array else 1

    numcols += 1  # add a row and column for headings
    numrows += 1

    if legend:
        numcols += 1

    fig = mpl.figure.Figure(figsize=figsize, tight_layout=True)
    canvas = mbb.FigureCanvasAgg(fig)

    if len(cols_array):
        wrs = [1] + [5 for i in cols_array]
    else:
        wrs = [1, 5]

    if legend:
        wrs += [1]

    if rows_array:
        hrs = [1] + [5 for i in rows_array]
    else:
        hrs = [1, 5]

    grid = gs.GridSpec(numrows, numcols, width_ratios=wrs,
                       height_ratios=hrs)

    if len(cols_array) > 0:
        x = 1 if len(rows_array) else 1
        for i, val in enumerate(cols_array, start=x):
            ax = fig.add_subplot(grid[0, i])
            ax.text(.5, .3, val)
            ax.axis('off')
            p = mpl.patches.Rectangle((0, 0), 1, 1,
                                      fill=True, transform=ax.transAxes, clip_on=True,
                                      fc='#C8C8C8')
            ax.add_patch(p)

    if len(rows_array) > 0:
        y = 1 if len(cols_array) else 1
        for i, val in enumerate(rows_array, start=y):
            ax = fig.add_subplot(grid[i, 0])
            ax.text(.5, .5, val, rotation=90)
            ax.axis('off')
            p = mpl.patches.Rectangle((0, 0), 1, 1,
                                      fill=True, transform=ax.transAxes, clip_on=True,
                                      fc='#C8C8C8')
            ax.add_patch(p)

    # if rows and columns are provided, we need all combinations
    # itertools product will return nothing if one of the cols/rows is None
    # so then we will default to the longest of the cols/rows
    charts = list(itertools.product(cols_array, rows_array))
    if not list(charts):
        try:
            charts = list(itertools.zip_longest(cols_array, rows_array))
        except AttributeError:  #py2
            charts = list(itertools.izip_longest(cols_array, rows_array))

    if legend:
        cgrid = common.colors.colormap(df[legend], kind='discrete', cmap=cmap)

    for x, y in charts:
        # fitler the data for the exact chart we are looking at
        tdf = df[df[cols] == x] if (x and cols) else df
        tdf = tdf[tdf[rows] == y] if (y and rows) else tdf

        if tdf.size == 0:
            continue

        # filter te color grid to match the chart data
        tc = None
        if legend:
            tc = cgrid[df[cols] == x] if (x and cols) else cgrid
            tc = tc[df[rows] == y] if (y and rows) else tc
            tc = tc.reset_index(drop=True)

        ax = fig.add_subplot(grid[rows_array.index(y) + 1 if y else 1,
                                  cols_array.index(x) + 1 if x else 1])

        # call the particular chart in provided
        if legend:
            chart(data=tdf, axes=ax, cgrid=tc, legend=legend, **args)
        else:
            chart(data=tdf, axes=ax, cgrid=tc, **args)

    if legend:
        legend_color = {}
        for i, key in df[legend].iteritems():
            legend_color[key] = cgrid[i]

        axl = fig.add_subplot(grid[1, numcols-1])
        axl = components.legend(sorted(list(legend_color.items())), axl)
        axl.set_title(legend, loc='left')

    fig.suptitle('')
    return canvas.figure
Example #7
0
def scatter(x, y, data=None, legend=None,  marker='o', alpha=.5,
            xscale='linear', yscale='linear', cmap='default', figsize=(12, 6),
            fit=None, fitparams=None, table=True, fig=None, axes=None, cgrid=None,
            **kwargs):
    """
    Scatter plots with regression lines
    :param x:  str or ndarray
    :param y: str or ndarray
    :param data: pandas.Dataframe
    :param legend: str or ndarray, color/fit by this column
    :param marker: matplotlib marker style
    :param alpha: float, matplotlib alpha
    :param xscale: default == linear, any of matplotlib scale types
    :param yscale: default == linear, any of matplotlib scale types
    :param cmap: any of matplotlib cmaps
    :param figsize: default == (9,6);
    :param fit: [linear, quadratic, smooth, interpolate]
    :param fitparams: params to pass to fitting function
    :param table:  show the regression table
    :param kwargs:
    :return: fig, (axes)
    """

    # if no dataframe is supplied, create one
    if data is None:
        (x, y, _, legend, _, _), data = components.create_df(x, y, legend)

    if not fitparams:
        fitparams = {}

    df = data.copy()
    df = df[[i for i in (x, y, legend) if i]]
    # many of the fitting routines don't work with nan or non-sorted data.
    df = df.dropna()
    df.sort_values(x)
    df = df.reset_index()

    # fit axis is for the regression equations
    makefitaxis = False
    if fit == 'linear' or fit == 'quadratic':
        makefitaxis = True

    if fig:
        fig = fig
        canvas = mbb.FigureCanvasAgg(fig)
        axm, axc, axl, axt = components.get_axes(fig)
    elif axes:
        axm = axes
    else:
        fig = mpl.figure.Figure(figsize=figsize, tight_layout=True)
        canvas = mbb.FigureCanvasAgg(fig)
        axm, axc, axl, axt = components.create_axes(False, legend, table and makefitaxis, fig=fig)

    if legend:
        # colormap is supposed to be the goto function to get all colormaps
        # should return a colorgrid that maps each point to a set of colors
        if cgrid is None:
            cgrid = common.colors.colormap(df[legend],
                                           kind='discrete', cmap=cmap)

        legend_color = {}
        for i, key in df[legend].iteritems():
            legend_color[key] = cgrid[i]

        # if the axis is supplied, we do not want to create a legend axis
        if not axes:
            components.legend(sorted(list(legend_color.items())), axl)
            axl.set_title(legend, loc='left')

        text = ''
        for l in sorted(set(df[legend])):
            t = df[df[legend] == l]
            axm.scatter(x=t[x], y=t[y], c=legend_color[l],
                        marker=marker, alpha=alpha, **kwargs)

            if fit:
                xs, ys, fn = _get_fit(x, y, t, fit, fitparams)
                axm.plot(xs, ys, c=legend_color[l])

                if makefitaxis and table:
                    text += '${}:  {}$\n'.format(str(l).strip(), fn)

        if makefitaxis and table and not axes:
            components.regressiontable(text, axt, fig)
            axt.axis('off')

    else:
        axm.scatter(x=df[x], y=df[y],
                    marker=marker, alpha=alpha, **kwargs)
        if fit:
            xs, ys, fn = _get_fit(x, y, df, fit, fitparams)
            axm.plot(xs, ys)

            if makefitaxis and table:
                components.regressiontable('{}'.format(fn), axt, fig)

    axm.set_xlim(np.min(df[x]), np.max(df[x]))
    axm.set_ylim(np.min(df[y]), np.max(df[y]))
    axm.set_yscale(yscale)
    axm.set_xscale(xscale)
    axm.set_xlabel(x)
    axm.set_ylabel(y)

    if axes:
        return axm

    return canvas.figure