Beispiel #1
0
def main():
    args = get_input_args()
    df = io_lib.df_from_input(args)

    # extract parameters from arg parser
    nbins = args.nbins[0]
    range_tup = args.range
    layout_tup = args.layout
    alpha = args.alpha[0]
    do_density = args.density
    sharex = args.sharex
    sharey = args.sharey
    cols = args.cols if args.cols else [df.columns[0]]

    validate_args(args, cols, df)
    plot_lib.set_plot_styling(args)

    # no plotting if output requested
    if args.quiet:
        counts, edges = np.histogram(
            df[cols[0]], bins=nbins, range=range_tup, density=do_density)
        centers = edges[:-1] + 0.5 * np.diff(edges)
        df_out = pd.DataFrame({'bins': centers, 'counts': counts})
        io_lib.df_to_output(args, df_out)

    # otherwise do plotting
    else:
        df.hist(cols, bins=nbins, range=range_tup,
                alpha=alpha, sharex=sharex, sharey=sharey, layout=layout_tup,
                normed=do_density)

        plot_lib.refine_plot(args)
        plot_lib.show(args)
Beispiel #2
0
    def test_show_creates_png_file(self):
        """show() saves a png file
        """
        file_name = os.path.join(self.dir_name, 'plot.png')
        args = MagicMock(savefig=[file_name])

        plot_lib.show(args)

        self.assertTrue(os.path.isfile(file_name))
Beispiel #3
0
    def test_show_creates_png_file(self):
        """show() saves a png file
        """
        file_name = os.path.join(self.dir_name, "plot.png")
        args = MagicMock(savefig=[file_name])

        plot_lib.show(args)

        self.assertTrue(os.path.isfile(file_name))
Beispiel #4
0
    def test_show_creates_html_file(self):
        """show() saves a png file
        """
        file_name = os.path.join(self.dir_name, 'plot.html')
        args = MagicMock(savefig=[file_name])

        xlabel = 'my_xlabel_string'
        pl.xlabel(xlabel)
        plot_lib.show(args)
        with open(file_name) as f:
            self.assertTrue(xlabel in f.read())
Beispiel #5
0
    def test_show_creates_html_file(self):
        """show() saves a png file
        """
        file_name = os.path.join(self.dir_name, "plot.html")
        args = MagicMock(savefig=[file_name])

        xlabel = "my_xlabel_string"
        pl.xlabel(xlabel)
        plot_lib.show(args)
        with open(file_name) as f:
            self.assertTrue(xlabel in f.read())
Beispiel #6
0
def main():
    args = get_input_args()
    df = io_lib.df_from_input(args)

    # extract parameters from arg parser
    nbins = args.nbins[0]
    range_tup = args.range
    layout_tup = args.layout
    alpha = args.alpha[0]
    do_density = args.density
    sharex = args.sharex
    sharey = args.sharey
    cols = args.cols if args.cols else [df.columns[0]]

    validate_args(args, cols, df)
    plot_lib.set_plot_styling(args)

    # no plotting if output requested
    if args.quiet:
        counts, edges = np.histogram(df[cols[0]],
                                     bins=nbins,
                                     range=range_tup,
                                     density=do_density)
        centers = edges[:-1] + 0.5 * np.diff(edges)
        df_out = pd.DataFrame({'bins': centers, 'counts': counts})
        io_lib.df_to_output(args, df_out)

    # otherwise do plotting
    else:
        df.hist(cols,
                bins=nbins,
                range=range_tup,
                alpha=alpha,
                sharex=sharex,
                sharey=sharey,
                layout=layout_tup,
                normed=do_density)

        plot_lib.refine_plot(args)
        plot_lib.show(args)
Beispiel #7
0
def main():
    msg = textwrap.dedent(
        """
        Performs (multivariable) linear regression.  The fitting model
        is specified using the R-like, patsy syntax.  Input is from stdin
        and output is either fitting information or the input data
        with columns added for the fit and residuals.

        -----------------------------------------------------------------------
        Examples:
            * Fit a line to the sea-level data
                p.example_data -d sealevel | p.regress -m 'sealevel_mm ~ year'

            * Fit a trend plus annual cycle to sealevel data
                p.example_data -d sealevel \\
                | p.df 'df["sin"] =  np.sin(2 * np.pi * df.year)' \\
                | p.df 'df["cos"] = np.cos(2 * np.pi * df.year)' \\
                | p.regress -m 'sealevel_mm ~ year + cos + sin'

            * Examine residual ECDF of trend plus annual fit
                p.example_data -d sealevel \\
                | p.df 'df["sin"] =  np.sin(2 * np.pi * df.year)' \\
                | p.df 'df["cos"] = np.cos(2 * np.pi * df.year)' \\
                | p.regress -m 'sealevel_mm ~ year + cos + sin' --fit \\
                | p.cdf -c 'resid_' --title 'ECDF of trend + annual'

            * Detrend sealevel data to more clearly reveal oscillations
                p.example_data -d sealevel \\
                | p.regress -m 'sealevel_mm ~ year' --fit \\
                | p.plot -x year -y resid_ --ylabel 'Trend removed (mm)' \\
                         --title 'Global Sea Surface Height'

            * Set origin of sealevel data to 0 and regress with no intercept
                p.example_data -d sealevel\\
                | p.df 'df["year"] = df.year - df.year.iloc[0]'\\
                'df["sealevel_mm"] = df.sealevel_mm - df.sealevel_mm.iloc[0]'\\
                | p.regress -m 'sealevel_mm ~ year - 1' --fit\\
                | p.plot -x year -y sealevel_mm fit_ --style '.' '-'\\
                     --alpha .2 1 --legend best --title 'Force Zero Intercept'

        -----------------------------------------------------------------------
        """
    )

    #  read command line arguments
    parser = argparse.ArgumentParser(
        formatter_class=argparse.RawDescriptionHelpFormatter, description=msg)

    arg_lib.add_args(parser, 'io_in', 'io_out', 'example')

    # specify columns to histogram
    parser.add_argument("-m", "--model", type=str, nargs=1, required=True,
                        help="The model expressed in patsy syntax")

    msg = "Return input with fit and residual appended"
    parser.add_argument("--fit", action="store_true", dest='retfit',
                        default=False, help=msg)

    parser.add_argument("--plot", action="store_true",
                        default=False, help="Make residual plots")

    # parse arguments
    args = parser.parse_args()

    # get the input dataframe
    df = io_lib.df_from_input(args)

    # fit the model and add fit, resid columns
    result = sm.ols(formula=args.model[0], data=df).fit()
    df['fit_'] = result.fittedvalues
    df['resid_'] = result.resid

    # add and output the fit results if requested
    if args.retfit:
        io_lib.df_to_output(args, df)
        return

    # print the fit summary
    sys.stdout.write('\n{}\n'.format(result.summary()))
    sys.stdout.flush()

    # do plots if requested
    if args.plot:
        pl.subplot(211)
        pl.plot(df.fit_, df.resid_, '.', alpha=.5)
        pl.xlabel('Fit')
        pl.ylabel('Residual')
        pl.title(args.model[0])

        pl.subplot(212)
        sns.distplot(df.resid_, bins=50)
        pl.xlabel('Residual with R^2 = {:0.4f}'.format(result.rsquared))
        pl.ylabel('Counts')

        # annoying issue with osx backend forces if statement here
        if mpl.get_backend().lower() in ['agg', 'macosx']:
            pl.gcf().set_tight_layout(True)
        else:
            pl.gcf().tight_layout()

        plot_lib.show(args)
def main():
    msg = textwrap.dedent("""
        Creates faceted plots using seaborn FacetGrid.

        With this tool, you can create a group of plots which show aspects
        of the same dataset broken down in different ways.  See the seaborn
        FacetGrid documentation for more detail.

        The --map argument to this function specifies a function to use
        for generating each of the plots.  The following modules are available
        in the namespace:
            pl = pylab
            sns = seaborn
        -----------------------------------------------------------------------
        Examples:

            * Scatterplot of tips vs bill for different combinations of sex,
              smoker, and day of the week:
                    p.example_data -d tips | \\
                    p.facet_grid --row smoker --col sex --hue day \\
                    --map pl.scatter \\
                    --args total_bill tip --kwargs 'alpha=.2' 's=100'

            * Histogram of tips broken down by sex, smoker and day
                    p.example_data -d tips | p.facet_grid --col day \\
                    --row sex --hue smoker  --sharex --sharey --aspect 1 \\
                    --map pl.hist --args tip \\
                    --kwargs 'alpha=.2' 'range=[0, 10]' 'bins=20'
        -----------------------------------------------------------------------
        """)

    #  read command line arguments
    parser = argparse.ArgumentParser(
        formatter_class=argparse.RawDescriptionHelpFormatter, description=msg)

    arg_lib.add_args(parser, 'io_in')

    msg = 'Different values of this variable in separate rows'
    parser.add_argument('--row',
                        nargs=1,
                        type=str,
                        dest='row',
                        metavar='row',
                        help=msg)

    msg = 'Different values of this variable in separate columns'
    parser.add_argument('--col',
                        nargs=1,
                        type=str,
                        dest='col',
                        metavar='col',
                        help=msg)

    msg = 'Different values of this variable in separate colors'
    parser.add_argument('--hue',
                        nargs=1,
                        type=str,
                        dest='hue',
                        metavar='hue',
                        help=msg)

    msg = 'The aspect ratio of each plot'
    parser.add_argument('--aspect',
                        nargs=1,
                        type=float,
                        dest='aspect',
                        metavar='aspect',
                        default=[2],
                        help=msg)

    msg = 'The size of each plot (default=4)'
    parser.add_argument('--size',
                        nargs=1,
                        type=float,
                        dest='size',
                        metavar='size',
                        help=msg,
                        default=[4])

    msg = 'The plotting function to use for each facet'
    parser.add_argument('--map',
                        nargs=1,
                        type=str,
                        dest='map',
                        metavar='map',
                        required=True,
                        help=msg)

    msg = 'The args to pass to the plotting function'
    parser.add_argument('--args',
                        nargs='+',
                        type=str,
                        dest='args',
                        metavar='args',
                        required=True,
                        help=msg)

    msg = 'Plotting function kwargs expressed as \'a=1\' \'b=2\' ... '
    parser.add_argument('--kwargs',
                        nargs='+',
                        type=str,
                        dest='kwargs',
                        metavar='kwargs',
                        help=msg)

    msg = 'Share x axis'
    parser.add_argument('--sharex',
                        action='store_true',
                        dest='sharex',
                        default=False,
                        help=msg)

    msg = 'Share y axis'
    parser.add_argument('--sharey',
                        action='store_true',
                        dest='sharey',
                        default=False,
                        help=msg)

    msg = 'x axis limits when sharex=True'
    parser.add_argument('--xlim',
                        nargs=2,
                        type=float,
                        dest='xlim',
                        metavar='xlim',
                        help=msg)

    msg = 'y axis limits when sharex=True'
    parser.add_argument('--ylim',
                        nargs=2,
                        type=float,
                        dest='ylim',
                        metavar='ylim',
                        help=msg)

    msg = "Save the figure to this file"
    parser.add_argument('--savefig', nargs=1, type=str, help=msg)

    warnings.filterwarnings('ignore')
    # parse arguments
    args = parser.parse_args()

    # get the input dataframe
    df = io_lib.df_from_input(args)

    facet_grid_kwargs = {
        'row': args.row[0] if args.row else None,
        'col': args.col[0] if args.col else None,
        'hue': args.hue[0] if args.hue else None,
        'aspect': args.aspect[0],
        'size': args.size[0],
        'sharex': args.sharex,
        'sharey': args.sharey,
        'xlim': args.xlim if args.xlim else None,
        'ylim': args.ylim if args.ylim else None,
    }
    grid = sns.FacetGrid(df, **facet_grid_kwargs)

    map_func_name = args.map[0]

    scope = {'pl': pl, 'sns': sns, 'map_func_name': map_func_name}
    exec('map_func = {}'.format(map_func_name), scope)
    map_func = scope['map_func']

    map_args = args.args

    map_kwargs = {}
    if args.kwargs:
        for kwarg in args.kwargs:
            exec('map_kwargs.update(dict({}))'.format(kwarg))

    grid.map(map_func, *map_args, **map_kwargs)  # noqa  defined in exec above
    grid.add_legend()
    plot_lib.show(args)
Beispiel #9
0
 def test_show_calls_pylab_show(self, show_mock):
     """show() call pylab.show()
     """
     args = MagicMock(savefig=[])
     plot_lib.show(args)
     self.assertTrue(show_mock.called)
Beispiel #10
0
def exec_plot_command(args, cmd, df):  # pragma: no cover
    from pandashells.lib import plot_lib
    plot_lib.set_plot_styling(args)
    execute(cmd, scope_entries={'df': df})
    plot_lib.refine_plot(args)
    plot_lib.show(args)
Beispiel #11
0
def exec_plot_command(args, cmd, df):  # pragma: no cover
    from pandashells.lib import plot_lib
    plot_lib.set_plot_styling(args)
    execute(cmd, scope_entries={'df': df})
    plot_lib.refine_plot(args)
    plot_lib.show(args)
Beispiel #12
0
def main():
    msg = textwrap.dedent(
        """
        Create a single variable regression plot of specified order.

        -----------------------------------------------------------------------
        Examples:
            * Fit a line to synthetic data with boostrap errors.
                p.linspace 0 10 20 \\
                | p.df 'df["y_true"] = .2 * df.x' \\
                       'df["noise"] = np.random.randn(20)' \\
                        'df["y"] = df.y_true + df.noise' --names x \\
                | p.regplot -x x -y y

            * Fit a quadratic to synthetic data with boostrap errors.
                p.linspace 0 10 40 \\
                | p.df 'df["y_true"] = .5 * df.x  + .3 * df.x ** 2'\\
                       'df["noise"] = np.random.randn(40)' \\
                        'df["y"] = df.y_true + df.noise' --names x \\
                | p.regplot -x x -y y --order 2

            * Fit sealevel data with no bootstrap
                p.example_data -d sealevel\\
                | p.regplot -x year -y sealevel_mm --n_boot 1


        -----------------------------------------------------------------------
        """
    )

    #  read command line arguments
    parser = argparse.ArgumentParser(
        formatter_class=argparse.RawDescriptionHelpFormatter, description=msg)

    arg_lib.add_args(parser, 'io_in', 'io_out', 'decorating')

    msg = 'Column for dependent variable'
    parser.add_argument('-x', nargs=1, type=str, dest='x', metavar='col',
                        help=msg, required=True)

    msg = 'Column for independent variable'
    parser.add_argument('-y', nargs=1, type=str, dest='y',
                        metavar='col', help=msg, required=True)

    msg = 'The order of the polynomial to fit (default = 1)'
    parser.add_argument('--order', help=msg, nargs=1, default=[1], type=int)

    msg = 'Number of bootstrap samples for uncertainty region (default=1000)'
    parser.add_argument(
        '--n_boot', help=msg, nargs=1, default=[1000], type=int)

    parser.add_argument('-a', '--alpha', help='Set opacity',
                        nargs=1, default=[0.5], type=float)

    # parse arguments
    args = parser.parse_args()

    # get the input dataframe
    df = io_lib.df_from_input(args)

    # extract command line params
    x = df[args.x[0]].values
    y = df[args.y[0]].values

    # do a polyfit with the specified order
    coeffs = np.polyfit(x, y, args.order[0])

    label = make_label(coeffs, args.savefig)

    sns.regplot(
        x, y, order=args.order[0], n_boot=args.n_boot[0],
        line_kws={'label': label, 'color': CC[2], 'alpha': .5},
        scatter_kws={'alpha': args.alpha[0], 'color': CC[0]})

    pl.legend(loc='best')
    pl.xlabel(args.x[0])
    pl.ylabel(args.y[0])
    plot_lib.refine_plot(args)
    plot_lib.show(args)
Beispiel #13
0
def main():
    msg = textwrap.dedent(
        """
        Plots the emperical cumulative distribution function (ECDF).

        -----------------------------------------------------------------------
        Examples:

            * Plot ECDF for 10k samples from the standard normal distribution.
                p.rand -t normal -n 10000 | p.cdf -c c0

            * Instead of plotting, send ECDF values to stdout
                p.rand -t normal -n 10000 | p.cdf -c c0 -q | head
        -----------------------------------------------------------------------
        """
    )

    # read command line arguments
    parser = argparse.ArgumentParser(
        formatter_class=argparse.RawDescriptionHelpFormatter, description=msg)

    # specify column to use
    parser.add_argument(
        "-c", "--col", required=True, nargs=1,
        help="Column to plot distribution")
    parser.add_argument(
        '-n', '--n_points', nargs=1, type=int,
        help='Number of output points (default is twice input len)')
    parser.add_argument(
        '-q', '--quiet', action='store_true', default=False,
        help='Quiet mean no plots. Send numeric output to stdout instead')

    # parse arguments
    arg_lib.add_args(parser, 'decorating', 'io_in', 'io_out',)
    args = parser.parse_args()

    # get the input dataframe and extract column
    df = io_lib.df_from_input(args)
    x = df[args.col[0]].values

    # create the output distribution
    n_out = 2 * len(x) if args.n_points is None else args.n_points[0]
    x_out = np.linspace(min(x), max(x), n_out)
    y_out = ECDF(x)(x_out)

    # send values to stdout if quiet specified
    if args.quiet:
        df_out = pd.DataFrame(
            {'x': x_out, 'p_less': y_out, 'p_greater': 1 - y_out})
        df_out = df_out[['x', 'p_less', 'p_greater']]
        io_lib.df_to_output(args, df_out)
        return

    # set the appropriate theme ad make plot
    plot_lib.set_plot_styling(args)
    pl.plot(x_out, y_out, label='P({} < x)'.format(args.col[0]))
    pl.plot(x_out, 1. - y_out, label='P({} > x)'.format(args.col[0]))
    pl.xlabel('x')
    pl.legend(loc='best')

    plot_lib.refine_plot(args)
    plot_lib.show(args)
Beispiel #14
0
def main():
    msg = textwrap.dedent(
        """
        Creates faceted plots using seaborn FacetGrid.

        With this tool, you can create a group of plots which show aspects
        of the same dataset broken down in different ways.  See the seaborn
        FacetGrid documentation for more detail.

        The --map argument to this function specifies a function to use
        for generating each of the plots.  The following modules are available
        in the namespace:
            pl = pylab
            sns = seaborn
        -----------------------------------------------------------------------
        Examples:

            * Scatterplot of tips vs bill for different combinations of sex,
              smoker, and day of the week:
                    p.example_data -d tips | \\
                    p.facet_grid --row smoker --col sex --hue day \\
                    --map pl.scatter \\
                    --args total_bill tip --kwargs 'alpha=.2' 's=100'

            * Histogram of tips broken down by sex, smoker and day
                    p.example_data -d tips | p.facet_grid --col day \\
                    --row sex --hue smoker  --sharex --sharey --aspect 1 \\
                    --map pl.hist --args tip \\
                    --kwargs 'alpha=.2' 'range=[0, 10]' 'bins=20'
        -----------------------------------------------------------------------
        """
    )

    #  read command line arguments
    parser = argparse.ArgumentParser(
        formatter_class=argparse.RawDescriptionHelpFormatter, description=msg)

    arg_lib.add_args(parser, 'io_in')

    msg = 'Different values of this variable in separate rows'
    parser.add_argument(
        '--row', nargs=1, type=str, dest='row', metavar='row', help=msg)

    msg = 'Different values of this variable in separate columns'
    parser.add_argument(
        '--col', nargs=1, type=str, dest='col', metavar='col', help=msg)

    msg = 'Different values of this variable in separate colors'
    parser.add_argument(
        '--hue', nargs=1, type=str, dest='hue', metavar='hue', help=msg)

    msg = 'The aspect ratio of each plot'
    parser.add_argument(
        '--aspect', nargs=1, type=float, dest='aspect', metavar='aspect',
        default=[2], help=msg)

    msg = 'The size of each plot (default=4)'
    parser.add_argument(
        '--size', nargs=1, type=float, dest='size', metavar='size',
        help=msg, default=[4])

    msg = 'The plotting function to use for each facet'
    parser.add_argument(
        '--map', nargs=1, type=str, dest='map', metavar='map', required=True,
        help=msg)

    msg = 'The args to pass to the plotting function'
    parser.add_argument(
        '--args', nargs='+', type=str, dest='args', metavar='args',
        required=True, help=msg)

    msg = 'Plotting function kwargs expressed as \'a=1\' \'b=2\' ... '
    parser.add_argument(
        '--kwargs', nargs='+', type=str, dest='kwargs',
        metavar='kwargs', help=msg)

    msg = 'Share x axis'
    parser.add_argument('--sharex', action='store_true', dest='sharex',
                        default=False, help=msg)

    msg = 'Share y axis'
    parser.add_argument('--sharey', action='store_true', dest='sharey',
                        default=False, help=msg)

    msg = 'x axis limits when sharex=True'
    parser.add_argument(
        '--xlim', nargs=2, type=float, dest='xlim', metavar='xlim', help=msg)

    msg = 'y axis limits when sharex=True'
    parser.add_argument(
        '--ylim', nargs=2, type=float, dest='ylim', metavar='ylim', help=msg)

    msg = "Save the figure to this file"
    parser.add_argument('--savefig', nargs=1, type=str, help=msg)

    warnings.filterwarnings('ignore')
    # parse arguments
    args = parser.parse_args()

    # get the input dataframe
    df = io_lib.df_from_input(args)

    facet_grid_kwargs = {
        'row': args.row[0] if args.row else None,
        'col': args.col[0] if args.col else None,
        'hue': args.hue[0] if args.hue else None,
        'aspect': args.aspect[0],
        'size': args.size[0],
        'sharex': args.sharex,
        'sharey': args.sharey,
        'xlim': args.xlim if args.xlim else None,
        'ylim': args.ylim if args.ylim else None,
    }
    grid = sns.FacetGrid(df, **facet_grid_kwargs)

    map_func_name = args.map[0]

    scope = {'pl': pl, 'sns': sns, 'map_func_name': map_func_name}
    exec('map_func = {}'.format(map_func_name), scope)
    map_func = scope['map_func']

    map_args = args.args

    map_kwargs = {}
    if args.kwargs:
        for kwarg in args.kwargs:
            exec('map_kwargs.update(dict({}))'.format(kwarg))

    grid.map(map_func, *map_args, **map_kwargs)  # noqa  defined in exec above
    grid.add_legend()
    plot_lib.show(args)
Beispiel #15
0
def main():
    msg = textwrap.dedent(
        """
        Creates faceted plots using seaborn FacetGrid.

        With this tool, you can create a group of plots which show aspects
        of the same dataset broken down in different ways.  See the seaborn
        FacetGrid documentation for more detail.

        The --map argument to this function specifies a function to use
        for generating each of the plots.  The following modules are available
        in the namespace:
            pl = pylab
            sns = seaborn
        -----------------------------------------------------------------------
        Examples:

            * Scatterplot of tips vs bill for different combinations of sex,
              smoker, and day of the week:
                    p.example_data -d tips | \\
                    p.facet_grid --row smoker --col sex --hue day \\
                    --map pl.scatter \\
                    --args total_bill tip --kwargs 'alpha=.2' 's=100'

            * Histogram of tips broken down by sex, smoker and day
                    p.example_data -d tips | p.facet_grid --col day \\
                    --row sex --hue smoker  --sharex --sharey --aspect 1 \\
                    --map pl.hist --args tip \\
                    --kwargs 'alpha=.2' 'range=[0, 10]' 'bins=20'
        -----------------------------------------------------------------------
        """
    )

    #  read command line arguments
    parser = argparse.ArgumentParser(formatter_class=argparse.RawDescriptionHelpFormatter, description=msg)

    arg_lib.add_args(parser, "io_in")

    msg = "Different values of this variable in separate rows"
    parser.add_argument("--row", nargs=1, type=str, dest="row", metavar="row", help=msg)

    msg = "Different values of this variable in separate columns"
    parser.add_argument("--col", nargs=1, type=str, dest="col", metavar="col", help=msg)

    msg = "Different values of this variable in separate colors"
    parser.add_argument("--hue", nargs=1, type=str, dest="hue", metavar="hue", help=msg)

    msg = "The aspect ratio of each plot"
    parser.add_argument("--aspect", nargs=1, type=float, dest="aspect", metavar="aspect", default=[2], help=msg)

    msg = "The size of each plot (default=4)"
    parser.add_argument("--size", nargs=1, type=float, dest="size", metavar="size", help=msg, default=[4])

    msg = "The plotting function to use for each facet"
    parser.add_argument("--map", nargs=1, type=str, dest="map", metavar="map", required=True, help=msg)

    msg = "The args to pass to the plotting function"
    parser.add_argument("--args", nargs="+", type=str, dest="args", metavar="args", required=True, help=msg)

    msg = "Plotting function kwargs expressed as 'a=1' 'b=2' ... "
    parser.add_argument("--kwargs", nargs="+", type=str, dest="kwargs", metavar="kwargs", help=msg)

    msg = "Share x axis"
    parser.add_argument("--sharex", action="store_true", dest="sharex", default=False, help=msg)

    msg = "Share y axis"
    parser.add_argument("--sharey", action="store_true", dest="sharey", default=False, help=msg)

    msg = "x axis limits when sharex=True"
    parser.add_argument("--xlim", nargs=2, type=float, dest="xlim", metavar="xlim", help=msg)

    msg = "y axis limits when sharex=True"
    parser.add_argument("--ylim", nargs=2, type=float, dest="ylim", metavar="ylim", help=msg)

    msg = "Save the figure to this file"
    parser.add_argument("--savefig", nargs=1, type=str, help=msg)

    # parse arguments
    args = parser.parse_args()

    # get the input dataframe
    df = io_lib.df_from_input(args)

    facet_grid_kwargs = {
        "row": args.row[0] if args.row else None,
        "col": args.col[0] if args.col else None,
        "hue": args.hue[0] if args.hue else None,
        "aspect": args.aspect[0],
        "size": args.size[0],
        "sharex": args.sharex,
        "sharey": args.sharey,
        "xlim": args.xlim if args.xlim else None,
        "ylim": args.ylim if args.ylim else None,
    }
    grid = sns.FacetGrid(df, **facet_grid_kwargs)

    map_func_name = args.map[0]

    scope = {"pl": pl, "sns": sns, "map_func_name": map_func_name}
    exec("map_func = {}".format(map_func_name), scope)
    map_func = scope["map_func"]

    map_args = args.args

    map_kwargs = {}
    if args.kwargs:
        for kwarg in args.kwargs:
            exec("map_kwargs.update(dict({}))".format(kwarg))

    grid.map(map_func, *map_args, **map_kwargs)  # noqa  defined in exec above
    grid.add_legend()
    plot_lib.show(args)
Beispiel #16
0
def main():
    msg = textwrap.dedent("""
        Performs (multivariable) linear regression.  The fitting model
        is specified using the R-like, patsy syntax.  Input is from stdin
        and output is either fitting information or the input data
        with columns added for the fit and residuals.

        -----------------------------------------------------------------------
        Examples:
            * Fit a line to the sea-level data
                p.example_data -d sealevel | p.regress -m 'sealevel_mm ~ year'

            * Fit a trend plus annual cycle to sealevel data
                p.example_data -d sealevel \\
                | p.df 'df["sin"] =  np.sin(2 * np.pi * df.year)' \\
                | p.df 'df["cos"] = np.cos(2 * np.pi * df.year)' \\
                | p.regress -m 'sealevel_mm ~ year + cos + sin'

            * Examine residual ECDF of trend plus annual fit
                p.example_data -d sealevel \\
                | p.df 'df["sin"] =  np.sin(2 * np.pi * df.year)' \\
                | p.df 'df["cos"] = np.cos(2 * np.pi * df.year)' \\
                | p.regress -m 'sealevel_mm ~ year + cos + sin' --fit \\
                | p.cdf -c 'resid_' --title 'ECDF of trend + annual'

            * Detrend sealevel data to more clearly reveal oscillations
                p.example_data -d sealevel \\
                | p.regress -m 'sealevel_mm ~ year' --fit \\
                | p.plot -x year -y resid_ --ylabel 'Trend removed (mm)' \\
                         --title 'Global Sea Surface Height'

            * Set origin of sealevel data to 0 and regress with no intercept
                p.example_data -d sealevel\\
                | p.df 'df["year"] = df.year - df.year.iloc[0]'\\
                'df["sealevel_mm"] = df.sealevel_mm - df.sealevel_mm.iloc[0]'\\
                | p.regress -m 'sealevel_mm ~ year - 1' --fit\\
                | p.plot -x year -y sealevel_mm fit_ --style '.' '-'\\
                     --alpha .2 1 --legend best --title 'Force Zero Intercept'

        -----------------------------------------------------------------------
        """)

    #  read command line arguments
    parser = argparse.ArgumentParser(
        formatter_class=argparse.RawDescriptionHelpFormatter, description=msg)

    arg_lib.add_args(parser, 'io_in', 'io_out', 'example')

    # specify columns to histogram
    parser.add_argument("-m",
                        "--model",
                        type=str,
                        nargs=1,
                        required=True,
                        help="The model expressed in patsy syntax")

    msg = "Return input with fit and residual appended"
    parser.add_argument("--fit",
                        action="store_true",
                        dest='retfit',
                        default=False,
                        help=msg)

    parser.add_argument("--plot",
                        action="store_true",
                        default=False,
                        help="Make residual plots")

    # parse arguments
    args = parser.parse_args()

    # get the input dataframe
    df = io_lib.df_from_input(args)

    # fit the model and add fit, resid columns
    result = sm.ols(formula=args.model[0], data=df).fit()
    df['fit_'] = result.fittedvalues
    df['resid_'] = result.resid

    # add and output the fit results if requested
    if args.retfit:
        io_lib.df_to_output(args, df)
        return

    # print the fit summary
    sys.stdout.write('\n{}\n'.format(result.summary()))
    sys.stdout.flush()

    # do plots if requested
    if args.plot:
        pl.subplot(211)
        pl.plot(df.fit_, df.resid_, '.', alpha=.5)
        pl.xlabel('Fit')
        pl.ylabel('Residual')
        pl.title(args.model[0])

        pl.subplot(212)
        sns.distplot(df.resid_, bins=50)
        pl.xlabel('Residual with R^2 = {:0.4f}'.format(result.rsquared))
        pl.ylabel('Counts')

        # annoying issue with osx backend forces if statement here
        if mpl.get_backend().lower() in ['agg', 'macosx']:
            pl.gcf().set_tight_layout(True)
        else:
            pl.gcf().tight_layout()

        plot_lib.show(args)
Beispiel #17
0
 def test_show_calls_pylab_show(self, show_mock):
     """show() call pylab.show()
     """
     args = MagicMock(savefig=[])
     plot_lib.show(args)
     self.assertTrue(show_mock.called)