Ejemplo n.º 1
0
def main():
    args = get_input_args()
    df = io_lib.df_from_input(args)

    # extract parameters from arg parser
    nbins = args.nbins[0]
    range_tup = args.range
    layout_tup = args.layout
    alpha = args.alpha[0]
    do_density = args.density
    sharex = args.sharex
    sharey = args.sharey
    cols = args.cols if args.cols else [df.columns[0]]

    validate_args(args, cols, df)
    plot_lib.set_plot_styling(args)

    # no plotting if output requested
    if args.quiet:
        counts, edges = np.histogram(
            df[cols[0]], bins=nbins, range=range_tup, density=do_density)
        centers = edges[:-1] + 0.5 * np.diff(edges)
        df_out = pd.DataFrame({'bins': centers, 'counts': counts})
        io_lib.df_to_output(args, df_out)

    # otherwise do plotting
    else:
        df.hist(cols, bins=nbins, range=range_tup,
                alpha=alpha, sharex=sharex, sharey=sharey, layout=layout_tup,
                normed=do_density)

        plot_lib.refine_plot(args)
        plot_lib.show(args)
Ejemplo n.º 2
0
def main():
    args = get_input_args()
    df = io_lib.df_from_input(args)

    # extract parameters from arg parser
    nbins = args.nbins[0]
    range_tup = args.range
    layout_tup = args.layout
    alpha = args.alpha[0]
    do_density = args.density
    sharex = args.sharex
    sharey = args.sharey
    cols = args.cols if args.cols else [df.columns[0]]

    validate_args(args, cols, df)
    plot_lib.set_plot_styling(args)

    # no plotting if output requested
    if args.quiet:
        counts, edges = np.histogram(df[cols[0]],
                                     bins=nbins,
                                     range=range_tup,
                                     density=do_density)
        centers = edges[:-1] + 0.5 * np.diff(edges)
        df_out = pd.DataFrame({'bins': centers, 'counts': counts})
        io_lib.df_to_output(args, df_out)

    # otherwise do plotting
    else:
        df.hist(cols,
                bins=nbins,
                range=range_tup,
                alpha=alpha,
                sharex=sharex,
                sharey=sharey,
                layout=layout_tup,
                normed=do_density)

        plot_lib.refine_plot(args)
        plot_lib.show(args)
Ejemplo n.º 3
0
def exec_plot_command(args, cmd, df):  # pragma: no cover
    from pandashells.lib import plot_lib
    plot_lib.set_plot_styling(args)
    execute(cmd, scope_entries={'df': df})
    plot_lib.refine_plot(args)
    plot_lib.show(args)
Ejemplo n.º 4
0
def main():
    msg = textwrap.dedent(
        """
        Create a single variable regression plot of specified order.

        -----------------------------------------------------------------------
        Examples:
            * Fit a line to synthetic data with boostrap errors.
                p.linspace 0 10 20 \\
                | p.df 'df["y_true"] = .2 * df.x' \\
                       'df["noise"] = np.random.randn(20)' \\
                        'df["y"] = df.y_true + df.noise' --names x \\
                | p.regplot -x x -y y

            * Fit a quadratic to synthetic data with boostrap errors.
                p.linspace 0 10 40 \\
                | p.df 'df["y_true"] = .5 * df.x  + .3 * df.x ** 2'\\
                       'df["noise"] = np.random.randn(40)' \\
                        'df["y"] = df.y_true + df.noise' --names x \\
                | p.regplot -x x -y y --order 2

            * Fit sealevel data with no bootstrap
                p.example_data -d sealevel\\
                | p.regplot -x year -y sealevel_mm --n_boot 1


        -----------------------------------------------------------------------
        """
    )

    #  read command line arguments
    parser = argparse.ArgumentParser(
        formatter_class=argparse.RawDescriptionHelpFormatter, description=msg)

    arg_lib.add_args(parser, 'io_in', 'io_out', 'decorating')

    msg = 'Column for dependent variable'
    parser.add_argument('-x', nargs=1, type=str, dest='x', metavar='col',
                        help=msg, required=True)

    msg = 'Column for independent variable'
    parser.add_argument('-y', nargs=1, type=str, dest='y',
                        metavar='col', help=msg, required=True)

    msg = 'The order of the polynomial to fit (default = 1)'
    parser.add_argument('--order', help=msg, nargs=1, default=[1], type=int)

    msg = 'Number of bootstrap samples for uncertainty region (default=1000)'
    parser.add_argument(
        '--n_boot', help=msg, nargs=1, default=[1000], type=int)

    parser.add_argument('-a', '--alpha', help='Set opacity',
                        nargs=1, default=[0.5], type=float)

    # parse arguments
    args = parser.parse_args()

    # get the input dataframe
    df = io_lib.df_from_input(args)

    # extract command line params
    x = df[args.x[0]].values
    y = df[args.y[0]].values

    # do a polyfit with the specified order
    coeffs = np.polyfit(x, y, args.order[0])

    label = make_label(coeffs, args.savefig)

    sns.regplot(
        x, y, order=args.order[0], n_boot=args.n_boot[0],
        line_kws={'label': label, 'color': CC[2], 'alpha': .5},
        scatter_kws={'alpha': args.alpha[0], 'color': CC[0]})

    pl.legend(loc='best')
    pl.xlabel(args.x[0])
    pl.ylabel(args.y[0])
    plot_lib.refine_plot(args)
    plot_lib.show(args)
Ejemplo n.º 5
0
def main():
    msg = textwrap.dedent(
        """
        Plots the emperical cumulative distribution function (ECDF).

        -----------------------------------------------------------------------
        Examples:

            * Plot ECDF for 10k samples from the standard normal distribution.
                p.rand -t normal -n 10000 | p.cdf -c c0

            * Instead of plotting, send ECDF values to stdout
                p.rand -t normal -n 10000 | p.cdf -c c0 -q | head
        -----------------------------------------------------------------------
        """
    )

    # read command line arguments
    parser = argparse.ArgumentParser(
        formatter_class=argparse.RawDescriptionHelpFormatter, description=msg)

    # specify column to use
    parser.add_argument(
        "-c", "--col", required=True, nargs=1,
        help="Column to plot distribution")
    parser.add_argument(
        '-n', '--n_points', nargs=1, type=int,
        help='Number of output points (default is twice input len)')
    parser.add_argument(
        '-q', '--quiet', action='store_true', default=False,
        help='Quiet mean no plots. Send numeric output to stdout instead')

    # parse arguments
    arg_lib.add_args(parser, 'decorating', 'io_in', 'io_out',)
    args = parser.parse_args()

    # get the input dataframe and extract column
    df = io_lib.df_from_input(args)
    x = df[args.col[0]].values

    # create the output distribution
    n_out = 2 * len(x) if args.n_points is None else args.n_points[0]
    x_out = np.linspace(min(x), max(x), n_out)
    y_out = ECDF(x)(x_out)

    # send values to stdout if quiet specified
    if args.quiet:
        df_out = pd.DataFrame(
            {'x': x_out, 'p_less': y_out, 'p_greater': 1 - y_out})
        df_out = df_out[['x', 'p_less', 'p_greater']]
        io_lib.df_to_output(args, df_out)
        return

    # set the appropriate theme ad make plot
    plot_lib.set_plot_styling(args)
    pl.plot(x_out, y_out, label='P({} < x)'.format(args.col[0]))
    pl.plot(x_out, 1. - y_out, label='P({} > x)'.format(args.col[0]))
    pl.xlabel('x')
    pl.legend(loc='best')

    plot_lib.refine_plot(args)
    plot_lib.show(args)
Ejemplo n.º 6
0
def exec_plot_command(args, cmd, df):  # pragma: no cover
    from pandashells.lib import plot_lib
    plot_lib.set_plot_styling(args)
    execute(cmd, scope_entries={'df': df})
    plot_lib.refine_plot(args)
    plot_lib.show(args)