def main(): args = get_input_args() df = io_lib.df_from_input(args) # extract parameters from arg parser nbins = args.nbins[0] range_tup = args.range layout_tup = args.layout alpha = args.alpha[0] do_density = args.density sharex = args.sharex sharey = args.sharey cols = args.cols if args.cols else [df.columns[0]] validate_args(args, cols, df) plot_lib.set_plot_styling(args) # no plotting if output requested if args.quiet: counts, edges = np.histogram( df[cols[0]], bins=nbins, range=range_tup, density=do_density) centers = edges[:-1] + 0.5 * np.diff(edges) df_out = pd.DataFrame({'bins': centers, 'counts': counts}) io_lib.df_to_output(args, df_out) # otherwise do plotting else: df.hist(cols, bins=nbins, range=range_tup, alpha=alpha, sharex=sharex, sharey=sharey, layout=layout_tup, normed=do_density) plot_lib.refine_plot(args) plot_lib.show(args)
def main(): args = get_input_args() df = io_lib.df_from_input(args) # extract parameters from arg parser nbins = args.nbins[0] range_tup = args.range layout_tup = args.layout alpha = args.alpha[0] do_density = args.density sharex = args.sharex sharey = args.sharey cols = args.cols if args.cols else [df.columns[0]] validate_args(args, cols, df) plot_lib.set_plot_styling(args) # no plotting if output requested if args.quiet: counts, edges = np.histogram(df[cols[0]], bins=nbins, range=range_tup, density=do_density) centers = edges[:-1] + 0.5 * np.diff(edges) df_out = pd.DataFrame({'bins': centers, 'counts': counts}) io_lib.df_to_output(args, df_out) # otherwise do plotting else: df.hist(cols, bins=nbins, range=range_tup, alpha=alpha, sharex=sharex, sharey=sharey, layout=layout_tup, normed=do_density) plot_lib.refine_plot(args) plot_lib.show(args)
def exec_plot_command(args, cmd, df): # pragma: no cover from pandashells.lib import plot_lib plot_lib.set_plot_styling(args) execute(cmd, scope_entries={'df': df}) plot_lib.refine_plot(args) plot_lib.show(args)
def main(): msg = textwrap.dedent( """ Create a single variable regression plot of specified order. ----------------------------------------------------------------------- Examples: * Fit a line to synthetic data with boostrap errors. p.linspace 0 10 20 \\ | p.df 'df["y_true"] = .2 * df.x' \\ 'df["noise"] = np.random.randn(20)' \\ 'df["y"] = df.y_true + df.noise' --names x \\ | p.regplot -x x -y y * Fit a quadratic to synthetic data with boostrap errors. p.linspace 0 10 40 \\ | p.df 'df["y_true"] = .5 * df.x + .3 * df.x ** 2'\\ 'df["noise"] = np.random.randn(40)' \\ 'df["y"] = df.y_true + df.noise' --names x \\ | p.regplot -x x -y y --order 2 * Fit sealevel data with no bootstrap p.example_data -d sealevel\\ | p.regplot -x year -y sealevel_mm --n_boot 1 ----------------------------------------------------------------------- """ ) # read command line arguments parser = argparse.ArgumentParser( formatter_class=argparse.RawDescriptionHelpFormatter, description=msg) arg_lib.add_args(parser, 'io_in', 'io_out', 'decorating') msg = 'Column for dependent variable' parser.add_argument('-x', nargs=1, type=str, dest='x', metavar='col', help=msg, required=True) msg = 'Column for independent variable' parser.add_argument('-y', nargs=1, type=str, dest='y', metavar='col', help=msg, required=True) msg = 'The order of the polynomial to fit (default = 1)' parser.add_argument('--order', help=msg, nargs=1, default=[1], type=int) msg = 'Number of bootstrap samples for uncertainty region (default=1000)' parser.add_argument( '--n_boot', help=msg, nargs=1, default=[1000], type=int) parser.add_argument('-a', '--alpha', help='Set opacity', nargs=1, default=[0.5], type=float) # parse arguments args = parser.parse_args() # get the input dataframe df = io_lib.df_from_input(args) # extract command line params x = df[args.x[0]].values y = df[args.y[0]].values # do a polyfit with the specified order coeffs = np.polyfit(x, y, args.order[0]) label = make_label(coeffs, args.savefig) sns.regplot( x, y, order=args.order[0], n_boot=args.n_boot[0], line_kws={'label': label, 'color': CC[2], 'alpha': .5}, scatter_kws={'alpha': args.alpha[0], 'color': CC[0]}) pl.legend(loc='best') pl.xlabel(args.x[0]) pl.ylabel(args.y[0]) plot_lib.refine_plot(args) plot_lib.show(args)
def main(): msg = textwrap.dedent( """ Plots the emperical cumulative distribution function (ECDF). ----------------------------------------------------------------------- Examples: * Plot ECDF for 10k samples from the standard normal distribution. p.rand -t normal -n 10000 | p.cdf -c c0 * Instead of plotting, send ECDF values to stdout p.rand -t normal -n 10000 | p.cdf -c c0 -q | head ----------------------------------------------------------------------- """ ) # read command line arguments parser = argparse.ArgumentParser( formatter_class=argparse.RawDescriptionHelpFormatter, description=msg) # specify column to use parser.add_argument( "-c", "--col", required=True, nargs=1, help="Column to plot distribution") parser.add_argument( '-n', '--n_points', nargs=1, type=int, help='Number of output points (default is twice input len)') parser.add_argument( '-q', '--quiet', action='store_true', default=False, help='Quiet mean no plots. Send numeric output to stdout instead') # parse arguments arg_lib.add_args(parser, 'decorating', 'io_in', 'io_out',) args = parser.parse_args() # get the input dataframe and extract column df = io_lib.df_from_input(args) x = df[args.col[0]].values # create the output distribution n_out = 2 * len(x) if args.n_points is None else args.n_points[0] x_out = np.linspace(min(x), max(x), n_out) y_out = ECDF(x)(x_out) # send values to stdout if quiet specified if args.quiet: df_out = pd.DataFrame( {'x': x_out, 'p_less': y_out, 'p_greater': 1 - y_out}) df_out = df_out[['x', 'p_less', 'p_greater']] io_lib.df_to_output(args, df_out) return # set the appropriate theme ad make plot plot_lib.set_plot_styling(args) pl.plot(x_out, y_out, label='P({} < x)'.format(args.col[0])) pl.plot(x_out, 1. - y_out, label='P({} > x)'.format(args.col[0])) pl.xlabel('x') pl.legend(loc='best') plot_lib.refine_plot(args) plot_lib.show(args)