Beispiel #1
0
def boxplot(x, y, data=None, legend=None, marker='o',
            alpha=.5, points=True, cumprob=False, yscale='linear',
            cmap='default', figsize=(12, 6),  orderby=None, table=True,
            fig=None, axes=None, cgrid=None, violin=False, **kwargs):
    """
    Boxplot function
    :param x: str or ndarray
    :param y: str or ndarray
    :param data: pd.Dataframe, source of data
    :param legend: str or ndarray color code by this column
    :param marker: str, default marker to use in plots
    :param alpha: float, alpha for plots
    :param points: bool, display or not display points
    :param cumprob: bool, display cumprob plot?
    :param yscale: str, default = linear, can be log or symlog too
    :param cmap: str, matplotlib colormap
    :param figsize: tuple(int,int), figure size
    :param orderby: str, order x axis by this param
    :param datatable: bool, show or not show datatable is available
    :param fig: matplotlib figure, if you want to re-use the figure, pass in one already created
    :param axes: matplotlib axes, if this is specified, the boxplot will be created on that axes,
                    and other axes will not be created.
    :param kwargs:


    :return: matplotlib figure
    """

    # if no dataframe is supplied, create one
    if data is None:
        (x, y, _, legend, _, _), data = components.create_df(x, y, legend)

    df = data.copy()
    df = df.reset_index()
    df[x] = df[x].astype('str')
    df[y] = df[y].astype('float').dropna()

    # TODO:  this doesn't really work right
    if orderby:
        temp = df.sort(x)
        t = temp.groupby(x)[orderby]
        map_of_x = col.OrderedDict()
        for mg in sorted(t.groups):
            g = t.get_group(mg).reset_index()
            map_of_x[mg] = g[orderby][0]

        list_to_order = sorted([value for value in map_of_x.values()])

        order = []
        x_to_loc = {}
        for k, v in map_of_x.items():
            idx = list_to_order.index(v)
            x_to_loc[k] = idx
            order.append(idx)

    min_, max_ = np.min(df[y]), np.max(df[y])

    # if an axis is supplied, we will not create another one
    # if a figure is supplied, we will reuse the figure
    if fig and not axes:
        fig = fig
        canvas = mbb.FigureCanvasAgg(fig)
        axm, axc, axl, axt = components.get_axes(fig)
    elif axes:
        axm = axes
    else:
        fig = mpl.figure.Figure(figsize=figsize, tight_layout=True)
        canvas = mbb.FigureCanvasAgg(fig)
        axm, axc, axl, axt = components.create_axes(cumprob, legend, table, fig=fig)

    if violin:
        array = []
        for arr in sorted(set(df[x])):
            array.append(df[df[x] == arr][y])

        axm.violinplot(array, showmedians=True)

    else:
        if orderby:
            df.boxplot(column=y, by=x, ax=axm, showfliers=False,
                       positions=order, fontsize=8, **kwargs)
        else:
            df.boxplot(column=y, by=x, ax=axm, showfliers=False, fontsize=8, **kwargs)

    # We need to identify all of the unique entries in the groupby column
    unique_groups = set(df[x])
    nonan_grps = []
    for group in unique_groups:
        if 'nan' not in group:
            nonan_grps.append(group)

    if legend:
        # colormap is supposed to be the goto function to get all colormaps
        # should return a colorgrid that maps each point to a set of colors
        # if cgrid is already supplied, we will re-use that color grid
        if cgrid is None:
            cgrid = common.colors.colormap(df[legend], kind='discrete', cmap=cmap)

        legend_color = {}
        for i, key in df[legend].iteritems():
            legend_color[key] = cgrid[i]

        if not axes:  # skip over creation of legend if axes is provided
            axl = components.legend(sorted(list(legend_color.items())), axl)
            axl.set_title(legend, loc='left')

    # add all the point level data
    groups = sorted(nonan_grps)
    for j, val in enumerate(groups):
        ys = df[y][df[x] == val]
        if orderby:
            pos = x_to_loc[val]
            xs = np.random.normal(pos, 0.05, size=len(ys))
        else:
            # create the jitters for the points
            xs = np.random.normal(j + 1, 0.05, size=len(ys))

        if points:

            # if cgrid is None, that is the standard way of creating the plot
            # cgrid is typically supplied by the jp.grid function
            if legend or cgrid is not None:
                cs = cgrid[df[x] == val]
                axm.scatter(xs, ys.values, color=cs, marker=marker, alpha=alpha,
                            linewidths=1, **kwargs)
            else:
                axm.scatter(xs, ys.values, marker=marker, alpha=alpha,
                            linewidths=1, **kwargs)

        # skip creating the cumprob plot if the axes was supplied
        if cumprob and not axes:
            if legend:
                cs = cgrid[df[x] == val]
                axc = components.cumprob(ys, axc, color=cs, alpha=alpha, swapxy=True)
            else:
                axc = components.cumprob(ys, axc, alpha=alpha, swapxy=True)

    # various formating
    axm.set_ylim(min_, max_)
    axm.set_yscale(yscale)
    axm.set_ylabel(y)
    for label in axm.get_xticklabels():
        label.set_rotation(90)

    if cumprob and not axes:
        axc.set_ylim(min_, max_)
        axc.set_yscale(yscale)
        axc.set_yticklabels([], visible=False)

        for label in axc.get_xticklabels():
            label.set_rotation(90)

    if table and not axes:
        components.datatable(y, data, axt, by=x)

    axm.set_title('')

    if axes:
        return axm

    fig.suptitle('')
    return canvas.figure
Beispiel #2
0
def cumprob(x, data=None, legend=None, figsize=(12, 6),
            xscale='linear', yscale='linear', cmap='default', alpha=0.5,
            marker='.', table=True, fig=None, axes=None, cgrid=None, **kwargs):
    """
    :param x:  str or ndarray
    :param data: is x is a str, this is a pd.Dataframe
    :param legend: str or ndarray,
    :param figsize: default is 9,6; sets the figure size
    :param xscale: default is linear, set the scale type [linear, log, symlog]
    :param yscale: default is linear, set the scale type [linear, log, symlog]
    :param cmap: colormap to use for plotting
    :param alpha: default is 0.5
    :param marker: set matplotlib marker
    :param table: bool, default is True, prints the datatable summary to the graph
    :param kwargs:  passed to matplotlib hist function
    :param fig: matplotlib figure if you want to reuse the figure.
    :return: matplotlib figure
    """

    # if no dataframe is supplied, create one
    if data is None:
        (x, _, _, legend, _, _), data = components.create_df(x, None, legend)

    df = data.copy()
    df = df.reset_index()
    df[x] = df[x].astype('float').dropna()

    min_, max_ = np.min(df[x]), np.max(df[x])

    if fig:
        fig = fig
        canvas = mbb.FigureCanvasAgg(fig)
        axm, axc, axl, axt = components.get_axes(fig)
    elif axes:
        axm = axes
    else:
        fig = mpl.figure.Figure(figsize=figsize, tight_layout=True)
        canvas = mbb.FigureCanvasAgg(fig)
        axm, axc, axl, axt = components.create_axes(None, legend, table, fig=fig)

    if table and not axes:
        axt = components.datatable(x, data, axt, by=legend)

    if legend:
        # colormap is supposed to be the goto function to get all colormaps
        # should return a colorgrid that maps each point to a set of colors
        if cgrid is None:
            cgrid = common.colors.colormap(df[legend], kind='discrete', cmap=cmap)


        legend_color = {}
        for i, key in df[legend].iteritems():
            legend_color[key] = cgrid[i]

        if not axes:
            axl = components.legend(sorted(list(legend_color.items())), axl)
            axl.set_title(legend,loc='left')

        for group in sorted(set(df[legend])):
            axm = components.cumprob(df[df[legend] == group][x],
                                       axm,
                                       color=legend_color[group],
                                       marker=marker,
                                       alpha=alpha)
    else:
        axm = components.cumprob(df[x], axm, marker=marker, alpha=alpha)

    # various formating
    for label in axm.get_xticklabels():
        label.set_rotation(90)
    axm.set_xlim(min_, max_)
    axm.set_xscale(xscale)
    axm.set_yscale(yscale)
    axm.set_xlabel(x)

    if axes:
        return axm

    return canvas.figure
Beispiel #3
0
def cumprob(x,
            data=None,
            legend=None,
            figsize=(12, 6),
            xscale='linear',
            yscale='linear',
            cmap='default',
            alpha=0.5,
            marker='.',
            table=True,
            fig=None,
            axes=None,
            cgrid=None,
            **kwargs):
    """
    :param x:  str or ndarray
    :param data: is x is a str, this is a pd.Dataframe
    :param legend: str or ndarray,
    :param figsize: default is 9,6; sets the figure size
    :param xscale: default is linear, set the scale type [linear, log, symlog]
    :param yscale: default is linear, set the scale type [linear, log, symlog]
    :param cmap: colormap to use for plotting
    :param alpha: default is 0.5
    :param marker: set matplotlib marker
    :param table: bool, default is True, prints the datatable summary to the graph
    :param kwargs:  passed to matplotlib hist function
    :param fig: matplotlib figure if you want to reuse the figure.
    :return: matplotlib figure
    """

    # if no dataframe is supplied, create one
    if data is None:
        (x, _, _, legend, _, _), data = components.create_df(x, None, legend)

    df = data.copy()
    df = df.reset_index()
    df[x] = df[x].astype('float').dropna()

    min_, max_ = np.min(df[x]), np.max(df[x])

    if fig:
        fig = fig
        canvas = mbb.FigureCanvasAgg(fig)
        axm, axc, axl, axt = components.get_axes(fig)
    elif axes:
        axm = axes
    else:
        fig = mpl.figure.Figure(figsize=figsize, tight_layout=True)
        canvas = mbb.FigureCanvasAgg(fig)
        axm, axc, axl, axt = components.create_axes(None,
                                                    legend,
                                                    table,
                                                    fig=fig)

    if table and not axes:
        axt = components.datatable(x, data, axt, by=legend)

    if legend:
        # colormap is supposed to be the goto function to get all colormaps
        # should return a colorgrid that maps each point to a set of colors
        if cgrid is None:
            cgrid = common.colors.colormap(df[legend],
                                           kind='discrete',
                                           cmap=cmap)

        legend_color = {}
        for i, key in df[legend].iteritems():
            legend_color[key] = cgrid[i]

        if not axes:
            axl = components.legend(sorted(list(legend_color.items())), axl)
            axl.set_title(legend, loc='left')

        for group in sorted(set(df[legend])):
            axm = components.cumprob(df[df[legend] == group][x],
                                     axm,
                                     color=legend_color[group],
                                     marker=marker,
                                     alpha=alpha)
    else:
        axm = components.cumprob(df[x], axm, marker=marker, alpha=alpha)

    # various formating
    for label in axm.get_xticklabels():
        label.set_rotation(90)
    axm.set_xlim(min_, max_)
    axm.set_xscale(xscale)
    axm.set_yscale(yscale)
    axm.set_xlabel(x)

    if axes:
        return axm

    return canvas.figure
Beispiel #4
0
def boxplot(x,
            y,
            data=None,
            legend=None,
            marker='o',
            alpha=.5,
            points=True,
            cumprob=False,
            yscale='linear',
            cmap='default',
            figsize=(12, 6),
            orderby=None,
            table=True,
            fig=None,
            axes=None,
            cgrid=None,
            violin=False,
            **kwargs):
    """
    Boxplot function
    :param x: str or ndarray
    :param y: str or ndarray
    :param data: pd.Dataframe, source of data
    :param legend: str or ndarray color code by this column
    :param marker: str, default marker to use in plots
    :param alpha: float, alpha for plots
    :param points: bool, display or not display points
    :param cumprob: bool, display cumprob plot?
    :param yscale: str, default = linear, can be log or symlog too
    :param cmap: str, matplotlib colormap
    :param figsize: tuple(int,int), figure size
    :param orderby: str, order x axis by this param
    :param datatable: bool, show or not show datatable is available
    :param fig: matplotlib figure, if you want to re-use the figure, pass in one already created
    :param axes: matplotlib axes, if this is specified, the boxplot will be created on that axes,
                    and other axes will not be created.
    :param kwargs:


    :return: matplotlib figure
    """

    # if no dataframe is supplied, create one
    if data is None:
        (x, y, _, legend, _, _), data = components.create_df(x, y, legend)

    df = data.copy()
    df = df.reset_index()
    df[x] = df[x].astype('str')
    df[y] = df[y].astype('float').dropna()

    # TODO:  this doesn't really work right
    if orderby:
        temp = df.sort(x)
        t = temp.groupby(x)[orderby]
        map_of_x = col.OrderedDict()
        for mg in sorted(t.groups):
            g = t.get_group(mg).reset_index()
            map_of_x[mg] = g[orderby][0]

        list_to_order = sorted([value for value in map_of_x.values()])

        order = []
        x_to_loc = {}
        for k, v in map_of_x.items():
            idx = list_to_order.index(v)
            x_to_loc[k] = idx
            order.append(idx)

    min_, max_ = np.min(df[y]), np.max(df[y])

    # if an axis is supplied, we will not create another one
    # if a figure is supplied, we will reuse the figure
    if fig and not axes:
        fig = fig
        canvas = mbb.FigureCanvasAgg(fig)
        axm, axc, axl, axt = components.get_axes(fig)
    elif axes:
        axm = axes
    else:
        fig = mpl.figure.Figure(figsize=figsize, tight_layout=True)
        canvas = mbb.FigureCanvasAgg(fig)
        axm, axc, axl, axt = components.create_axes(cumprob,
                                                    legend,
                                                    table,
                                                    fig=fig)

    if violin:
        array = []
        for arr in sorted(set(df[x])):
            array.append(df[df[x] == arr][y])

        axm.violinplot(array, showmedians=True)

    else:
        if orderby:
            df.boxplot(column=y,
                       by=x,
                       ax=axm,
                       showfliers=False,
                       positions=order,
                       fontsize=8,
                       **kwargs)
        else:
            df.boxplot(column=y,
                       by=x,
                       ax=axm,
                       showfliers=False,
                       fontsize=8,
                       **kwargs)

    # We need to identify all of the unique entries in the groupby column
    unique_groups = set(df[x])
    nonan_grps = []
    for group in unique_groups:
        if 'nan' not in group:
            nonan_grps.append(group)

    if legend:
        # colormap is supposed to be the goto function to get all colormaps
        # should return a colorgrid that maps each point to a set of colors
        # if cgrid is already supplied, we will re-use that color grid
        if cgrid is None:
            cgrid = common.colors.colormap(df[legend],
                                           kind='discrete',
                                           cmap=cmap)

        legend_color = {}
        for i, key in df[legend].iteritems():
            legend_color[key] = cgrid[i]

        if not axes:  # skip over creation of legend if axes is provided
            axl = components.legend(sorted(list(legend_color.items())), axl)
            axl.set_title(legend, loc='left')

    # add all the point level data
    groups = sorted(nonan_grps)
    for j, val in enumerate(groups):
        ys = df[y][df[x] == val]
        if orderby:
            pos = x_to_loc[val]
            xs = np.random.normal(pos, 0.05, size=len(ys))
        else:
            # create the jitters for the points
            xs = np.random.normal(j + 1, 0.05, size=len(ys))

        if points:

            # if cgrid is None, that is the standard way of creating the plot
            # cgrid is typically supplied by the jp.grid function
            if legend or cgrid is not None:
                cs = cgrid[df[x] == val]
                axm.scatter(xs,
                            ys.values,
                            color=cs,
                            marker=marker,
                            alpha=alpha,
                            linewidths=1,
                            **kwargs)
            else:
                axm.scatter(xs,
                            ys.values,
                            marker=marker,
                            alpha=alpha,
                            linewidths=1,
                            **kwargs)

        # skip creating the cumprob plot if the axes was supplied
        if cumprob and not axes:
            if legend:
                cs = cgrid[df[x] == val]
                axc = components.cumprob(ys,
                                         axc,
                                         color=cs,
                                         alpha=alpha,
                                         swapxy=True)
            else:
                axc = components.cumprob(ys, axc, alpha=alpha, swapxy=True)

    # various formating
    axm.set_ylim(min_, max_)
    axm.set_yscale(yscale)
    axm.set_ylabel(y)
    for label in axm.get_xticklabels():
        label.set_rotation(90)

    if cumprob and not axes:
        axc.set_ylim(min_, max_)
        axc.set_yscale(yscale)
        axc.set_yticklabels([], visible=False)

        for label in axc.get_xticklabels():
            label.set_rotation(90)

    if table and not axes:
        components.datatable(y, data, axt, by=x)

    axm.set_title('')

    if axes:
        return axm

    fig.suptitle('')
    return canvas.figure
Beispiel #5
0
def scatter(x, y, data=None, legend=None,  marker='o', alpha=.5,
            xscale='linear', yscale='linear', cmap='default', figsize=(12, 6),
            fit=None, fitparams=None, table=True, fig=None, axes=None, cgrid=None,
            **kwargs):
    """
    Scatter plots with regression lines
    :param x:  str or ndarray
    :param y: str or ndarray
    :param data: pandas.Dataframe
    :param legend: str or ndarray, color/fit by this column
    :param marker: matplotlib marker style
    :param alpha: float, matplotlib alpha
    :param xscale: default == linear, any of matplotlib scale types
    :param yscale: default == linear, any of matplotlib scale types
    :param cmap: any of matplotlib cmaps
    :param figsize: default == (9,6);
    :param fit: [linear, quadratic, smooth, interpolate]
    :param fitparams: params to pass to fitting function
    :param table:  show the regression table
    :param kwargs:
    :return: fig, (axes)
    """

    # if no dataframe is supplied, create one
    if data is None:
        (x, y, _, legend, _, _), data = components.create_df(x, y, legend)

    if not fitparams:
        fitparams = {}

    df = data.copy()
    df = df[[i for i in (x, y, legend) if i]]
    # many of the fitting routines don't work with nan or non-sorted data.
    df = df.dropna()
    df.sort_values(x)
    df = df.reset_index()

    # fit axis is for the regression equations
    makefitaxis = False
    if fit == 'linear' or fit == 'quadratic':
        makefitaxis = True

    if fig:
        fig = fig
        canvas = mbb.FigureCanvasAgg(fig)
        axm, axc, axl, axt = components.get_axes(fig)
    elif axes:
        axm = axes
    else:
        fig = mpl.figure.Figure(figsize=figsize, tight_layout=True)
        canvas = mbb.FigureCanvasAgg(fig)
        axm, axc, axl, axt = components.create_axes(False, legend, table and makefitaxis, fig=fig)

    if legend:
        # colormap is supposed to be the goto function to get all colormaps
        # should return a colorgrid that maps each point to a set of colors
        if cgrid is None:
            cgrid = common.colors.colormap(df[legend],
                                           kind='discrete', cmap=cmap)

        legend_color = {}
        for i, key in df[legend].iteritems():
            legend_color[key] = cgrid[i]

        # if the axis is supplied, we do not want to create a legend axis
        if not axes:
            components.legend(sorted(list(legend_color.items())), axl)
            axl.set_title(legend, loc='left')

        text = ''
        for l in sorted(set(df[legend])):
            t = df[df[legend] == l]
            axm.scatter(x=t[x], y=t[y], c=legend_color[l],
                        marker=marker, alpha=alpha, **kwargs)

            if fit:
                xs, ys, fn = _get_fit(x, y, t, fit, fitparams)
                axm.plot(xs, ys, c=legend_color[l])

                if makefitaxis and table:
                    text += '${}:  {}$\n'.format(str(l).strip(), fn)

        if makefitaxis and table and not axes:
            components.regressiontable(text, axt, fig)
            axt.axis('off')

    else:
        axm.scatter(x=df[x], y=df[y],
                    marker=marker, alpha=alpha, **kwargs)
        if fit:
            xs, ys, fn = _get_fit(x, y, df, fit, fitparams)
            axm.plot(xs, ys)

            if makefitaxis and table:
                components.regressiontable('{}'.format(fn), axt, fig)

    axm.set_xlim(np.min(df[x]), np.max(df[x]))
    axm.set_ylim(np.min(df[y]), np.max(df[y]))
    axm.set_yscale(yscale)
    axm.set_xscale(xscale)
    axm.set_xlabel(x)
    axm.set_ylabel(y)

    if axes:
        return axm

    return canvas.figure
Beispiel #6
0
def contour(x,
            y,
            z,
            data=None,
            marker=None,
            alpha=.5,
            xscale='linear',
            yscale='linear',
            cmap=None,
            ncontours=100,
            gridsize=100,
            colorbar=True,
            labels=False,
            figsize=(12, 6),
            filled=True,
            fig=None,
            axes=None,
            cgrid=None,
            axislabels=True,
            axisticks=True,
            **kwargs):
    """
    Create a contour plot from x, y, ans z values
    """

    # if no dataframe is supplied, create one
    if data is None:
        (x, y, z, _), data = components.create_df(x, y, z, _)

    df = data.copy()
    df = df[[i for i in (x, y, z) if i]]
    df = df.dropna()
    df = df.reset_index()

    if fig and not axes:
        fig = fig
        canvas = mbb.FigureCanvasAgg(fig)
        axm, axc, axl, axt = components.get_axes_1(fig)
    elif axes:
        axm = axes
    else:
        fig = mpl.figure.Figure(figsize=figsize, tight_layout=True)
        canvas = mbb.FigureCanvasAgg(fig)
        axm, _, _, _ = components.create_axes(False, False, False, fig=fig)

    xi = np.linspace(np.min(df[x]), np.max(df[x]), gridsize)
    yi = np.linspace(np.min(df[y]), np.max(df[y]), gridsize)
    try:
        zi = mpl.mlab.griddata(df[x], df[y], df[z], xi, yi, interp='linear')
    except ValueError:
        return

    if filled:
        cf = axm.contourf(xi, yi, zi, ncontours, cmap=cmap, **kwargs)
    else:
        cf = axm.contour(xi, yi, zi, ncontours, cmap=cmap, **kwargs)

    if not axisticks:
        axm.get_xaxis().set_visible(False)
        axm.get_yaxis().set_visible(False)

    if marker:
        axm.scatter(df[x], df[y], marker=marker, color='k')

    if colorbar and not axes:
        fig.colorbar(cf)

    if labels:
        axm.clabel(cf)

    axm.set_xlim(np.min(df[x]), np.max(df[x]))
    axm.set_ylim(np.min(df[y]), np.max(df[y]))
    axm.set_yscale(yscale)
    axm.set_xscale(xscale)

    if axislabels:
        axm.set_xlabel(x)
        axm.set_ylabel(y)

    if axes:
        return axm
    else:
        return canvas.figure