def main(): args = get_input_args() df = io_lib.df_from_input(args) # extract parameters from arg parser nbins = args.nbins[0] range_tup = args.range layout_tup = args.layout alpha = args.alpha[0] do_density = args.density sharex = args.sharex sharey = args.sharey cols = args.cols if args.cols else [df.columns[0]] validate_args(args, cols, df) plot_lib.set_plot_styling(args) # no plotting if output requested if args.quiet: counts, edges = np.histogram( df[cols[0]], bins=nbins, range=range_tup, density=do_density) centers = edges[:-1] + 0.5 * np.diff(edges) df_out = pd.DataFrame({'bins': centers, 'counts': counts}) io_lib.df_to_output(args, df_out) # otherwise do plotting else: df.hist(cols, bins=nbins, range=range_tup, alpha=alpha, sharex=sharex, sharey=sharey, layout=layout_tup, normed=do_density) plot_lib.refine_plot(args) plot_lib.show(args)
def test_show_creates_png_file(self): """show() saves a png file """ file_name = os.path.join(self.dir_name, 'plot.png') args = MagicMock(savefig=[file_name]) plot_lib.show(args) self.assertTrue(os.path.isfile(file_name))
def test_show_creates_png_file(self): """show() saves a png file """ file_name = os.path.join(self.dir_name, "plot.png") args = MagicMock(savefig=[file_name]) plot_lib.show(args) self.assertTrue(os.path.isfile(file_name))
def test_show_creates_html_file(self): """show() saves a png file """ file_name = os.path.join(self.dir_name, 'plot.html') args = MagicMock(savefig=[file_name]) xlabel = 'my_xlabel_string' pl.xlabel(xlabel) plot_lib.show(args) with open(file_name) as f: self.assertTrue(xlabel in f.read())
def test_show_creates_html_file(self): """show() saves a png file """ file_name = os.path.join(self.dir_name, "plot.html") args = MagicMock(savefig=[file_name]) xlabel = "my_xlabel_string" pl.xlabel(xlabel) plot_lib.show(args) with open(file_name) as f: self.assertTrue(xlabel in f.read())
def main(): args = get_input_args() df = io_lib.df_from_input(args) # extract parameters from arg parser nbins = args.nbins[0] range_tup = args.range layout_tup = args.layout alpha = args.alpha[0] do_density = args.density sharex = args.sharex sharey = args.sharey cols = args.cols if args.cols else [df.columns[0]] validate_args(args, cols, df) plot_lib.set_plot_styling(args) # no plotting if output requested if args.quiet: counts, edges = np.histogram(df[cols[0]], bins=nbins, range=range_tup, density=do_density) centers = edges[:-1] + 0.5 * np.diff(edges) df_out = pd.DataFrame({'bins': centers, 'counts': counts}) io_lib.df_to_output(args, df_out) # otherwise do plotting else: df.hist(cols, bins=nbins, range=range_tup, alpha=alpha, sharex=sharex, sharey=sharey, layout=layout_tup, normed=do_density) plot_lib.refine_plot(args) plot_lib.show(args)
def main(): msg = textwrap.dedent( """ Performs (multivariable) linear regression. The fitting model is specified using the R-like, patsy syntax. Input is from stdin and output is either fitting information or the input data with columns added for the fit and residuals. ----------------------------------------------------------------------- Examples: * Fit a line to the sea-level data p.example_data -d sealevel | p.regress -m 'sealevel_mm ~ year' * Fit a trend plus annual cycle to sealevel data p.example_data -d sealevel \\ | p.df 'df["sin"] = np.sin(2 * np.pi * df.year)' \\ | p.df 'df["cos"] = np.cos(2 * np.pi * df.year)' \\ | p.regress -m 'sealevel_mm ~ year + cos + sin' * Examine residual ECDF of trend plus annual fit p.example_data -d sealevel \\ | p.df 'df["sin"] = np.sin(2 * np.pi * df.year)' \\ | p.df 'df["cos"] = np.cos(2 * np.pi * df.year)' \\ | p.regress -m 'sealevel_mm ~ year + cos + sin' --fit \\ | p.cdf -c 'resid_' --title 'ECDF of trend + annual' * Detrend sealevel data to more clearly reveal oscillations p.example_data -d sealevel \\ | p.regress -m 'sealevel_mm ~ year' --fit \\ | p.plot -x year -y resid_ --ylabel 'Trend removed (mm)' \\ --title 'Global Sea Surface Height' * Set origin of sealevel data to 0 and regress with no intercept p.example_data -d sealevel\\ | p.df 'df["year"] = df.year - df.year.iloc[0]'\\ 'df["sealevel_mm"] = df.sealevel_mm - df.sealevel_mm.iloc[0]'\\ | p.regress -m 'sealevel_mm ~ year - 1' --fit\\ | p.plot -x year -y sealevel_mm fit_ --style '.' '-'\\ --alpha .2 1 --legend best --title 'Force Zero Intercept' ----------------------------------------------------------------------- """ ) # read command line arguments parser = argparse.ArgumentParser( formatter_class=argparse.RawDescriptionHelpFormatter, description=msg) arg_lib.add_args(parser, 'io_in', 'io_out', 'example') # specify columns to histogram parser.add_argument("-m", "--model", type=str, nargs=1, required=True, help="The model expressed in patsy syntax") msg = "Return input with fit and residual appended" parser.add_argument("--fit", action="store_true", dest='retfit', default=False, help=msg) parser.add_argument("--plot", action="store_true", default=False, help="Make residual plots") # parse arguments args = parser.parse_args() # get the input dataframe df = io_lib.df_from_input(args) # fit the model and add fit, resid columns result = sm.ols(formula=args.model[0], data=df).fit() df['fit_'] = result.fittedvalues df['resid_'] = result.resid # add and output the fit results if requested if args.retfit: io_lib.df_to_output(args, df) return # print the fit summary sys.stdout.write('\n{}\n'.format(result.summary())) sys.stdout.flush() # do plots if requested if args.plot: pl.subplot(211) pl.plot(df.fit_, df.resid_, '.', alpha=.5) pl.xlabel('Fit') pl.ylabel('Residual') pl.title(args.model[0]) pl.subplot(212) sns.distplot(df.resid_, bins=50) pl.xlabel('Residual with R^2 = {:0.4f}'.format(result.rsquared)) pl.ylabel('Counts') # annoying issue with osx backend forces if statement here if mpl.get_backend().lower() in ['agg', 'macosx']: pl.gcf().set_tight_layout(True) else: pl.gcf().tight_layout() plot_lib.show(args)
def main(): msg = textwrap.dedent(""" Creates faceted plots using seaborn FacetGrid. With this tool, you can create a group of plots which show aspects of the same dataset broken down in different ways. See the seaborn FacetGrid documentation for more detail. The --map argument to this function specifies a function to use for generating each of the plots. The following modules are available in the namespace: pl = pylab sns = seaborn ----------------------------------------------------------------------- Examples: * Scatterplot of tips vs bill for different combinations of sex, smoker, and day of the week: p.example_data -d tips | \\ p.facet_grid --row smoker --col sex --hue day \\ --map pl.scatter \\ --args total_bill tip --kwargs 'alpha=.2' 's=100' * Histogram of tips broken down by sex, smoker and day p.example_data -d tips | p.facet_grid --col day \\ --row sex --hue smoker --sharex --sharey --aspect 1 \\ --map pl.hist --args tip \\ --kwargs 'alpha=.2' 'range=[0, 10]' 'bins=20' ----------------------------------------------------------------------- """) # read command line arguments parser = argparse.ArgumentParser( formatter_class=argparse.RawDescriptionHelpFormatter, description=msg) arg_lib.add_args(parser, 'io_in') msg = 'Different values of this variable in separate rows' parser.add_argument('--row', nargs=1, type=str, dest='row', metavar='row', help=msg) msg = 'Different values of this variable in separate columns' parser.add_argument('--col', nargs=1, type=str, dest='col', metavar='col', help=msg) msg = 'Different values of this variable in separate colors' parser.add_argument('--hue', nargs=1, type=str, dest='hue', metavar='hue', help=msg) msg = 'The aspect ratio of each plot' parser.add_argument('--aspect', nargs=1, type=float, dest='aspect', metavar='aspect', default=[2], help=msg) msg = 'The size of each plot (default=4)' parser.add_argument('--size', nargs=1, type=float, dest='size', metavar='size', help=msg, default=[4]) msg = 'The plotting function to use for each facet' parser.add_argument('--map', nargs=1, type=str, dest='map', metavar='map', required=True, help=msg) msg = 'The args to pass to the plotting function' parser.add_argument('--args', nargs='+', type=str, dest='args', metavar='args', required=True, help=msg) msg = 'Plotting function kwargs expressed as \'a=1\' \'b=2\' ... ' parser.add_argument('--kwargs', nargs='+', type=str, dest='kwargs', metavar='kwargs', help=msg) msg = 'Share x axis' parser.add_argument('--sharex', action='store_true', dest='sharex', default=False, help=msg) msg = 'Share y axis' parser.add_argument('--sharey', action='store_true', dest='sharey', default=False, help=msg) msg = 'x axis limits when sharex=True' parser.add_argument('--xlim', nargs=2, type=float, dest='xlim', metavar='xlim', help=msg) msg = 'y axis limits when sharex=True' parser.add_argument('--ylim', nargs=2, type=float, dest='ylim', metavar='ylim', help=msg) msg = "Save the figure to this file" parser.add_argument('--savefig', nargs=1, type=str, help=msg) warnings.filterwarnings('ignore') # parse arguments args = parser.parse_args() # get the input dataframe df = io_lib.df_from_input(args) facet_grid_kwargs = { 'row': args.row[0] if args.row else None, 'col': args.col[0] if args.col else None, 'hue': args.hue[0] if args.hue else None, 'aspect': args.aspect[0], 'size': args.size[0], 'sharex': args.sharex, 'sharey': args.sharey, 'xlim': args.xlim if args.xlim else None, 'ylim': args.ylim if args.ylim else None, } grid = sns.FacetGrid(df, **facet_grid_kwargs) map_func_name = args.map[0] scope = {'pl': pl, 'sns': sns, 'map_func_name': map_func_name} exec('map_func = {}'.format(map_func_name), scope) map_func = scope['map_func'] map_args = args.args map_kwargs = {} if args.kwargs: for kwarg in args.kwargs: exec('map_kwargs.update(dict({}))'.format(kwarg)) grid.map(map_func, *map_args, **map_kwargs) # noqa defined in exec above grid.add_legend() plot_lib.show(args)
def test_show_calls_pylab_show(self, show_mock): """show() call pylab.show() """ args = MagicMock(savefig=[]) plot_lib.show(args) self.assertTrue(show_mock.called)
def exec_plot_command(args, cmd, df): # pragma: no cover from pandashells.lib import plot_lib plot_lib.set_plot_styling(args) execute(cmd, scope_entries={'df': df}) plot_lib.refine_plot(args) plot_lib.show(args)
def main(): msg = textwrap.dedent( """ Create a single variable regression plot of specified order. ----------------------------------------------------------------------- Examples: * Fit a line to synthetic data with boostrap errors. p.linspace 0 10 20 \\ | p.df 'df["y_true"] = .2 * df.x' \\ 'df["noise"] = np.random.randn(20)' \\ 'df["y"] = df.y_true + df.noise' --names x \\ | p.regplot -x x -y y * Fit a quadratic to synthetic data with boostrap errors. p.linspace 0 10 40 \\ | p.df 'df["y_true"] = .5 * df.x + .3 * df.x ** 2'\\ 'df["noise"] = np.random.randn(40)' \\ 'df["y"] = df.y_true + df.noise' --names x \\ | p.regplot -x x -y y --order 2 * Fit sealevel data with no bootstrap p.example_data -d sealevel\\ | p.regplot -x year -y sealevel_mm --n_boot 1 ----------------------------------------------------------------------- """ ) # read command line arguments parser = argparse.ArgumentParser( formatter_class=argparse.RawDescriptionHelpFormatter, description=msg) arg_lib.add_args(parser, 'io_in', 'io_out', 'decorating') msg = 'Column for dependent variable' parser.add_argument('-x', nargs=1, type=str, dest='x', metavar='col', help=msg, required=True) msg = 'Column for independent variable' parser.add_argument('-y', nargs=1, type=str, dest='y', metavar='col', help=msg, required=True) msg = 'The order of the polynomial to fit (default = 1)' parser.add_argument('--order', help=msg, nargs=1, default=[1], type=int) msg = 'Number of bootstrap samples for uncertainty region (default=1000)' parser.add_argument( '--n_boot', help=msg, nargs=1, default=[1000], type=int) parser.add_argument('-a', '--alpha', help='Set opacity', nargs=1, default=[0.5], type=float) # parse arguments args = parser.parse_args() # get the input dataframe df = io_lib.df_from_input(args) # extract command line params x = df[args.x[0]].values y = df[args.y[0]].values # do a polyfit with the specified order coeffs = np.polyfit(x, y, args.order[0]) label = make_label(coeffs, args.savefig) sns.regplot( x, y, order=args.order[0], n_boot=args.n_boot[0], line_kws={'label': label, 'color': CC[2], 'alpha': .5}, scatter_kws={'alpha': args.alpha[0], 'color': CC[0]}) pl.legend(loc='best') pl.xlabel(args.x[0]) pl.ylabel(args.y[0]) plot_lib.refine_plot(args) plot_lib.show(args)
def main(): msg = textwrap.dedent( """ Plots the emperical cumulative distribution function (ECDF). ----------------------------------------------------------------------- Examples: * Plot ECDF for 10k samples from the standard normal distribution. p.rand -t normal -n 10000 | p.cdf -c c0 * Instead of plotting, send ECDF values to stdout p.rand -t normal -n 10000 | p.cdf -c c0 -q | head ----------------------------------------------------------------------- """ ) # read command line arguments parser = argparse.ArgumentParser( formatter_class=argparse.RawDescriptionHelpFormatter, description=msg) # specify column to use parser.add_argument( "-c", "--col", required=True, nargs=1, help="Column to plot distribution") parser.add_argument( '-n', '--n_points', nargs=1, type=int, help='Number of output points (default is twice input len)') parser.add_argument( '-q', '--quiet', action='store_true', default=False, help='Quiet mean no plots. Send numeric output to stdout instead') # parse arguments arg_lib.add_args(parser, 'decorating', 'io_in', 'io_out',) args = parser.parse_args() # get the input dataframe and extract column df = io_lib.df_from_input(args) x = df[args.col[0]].values # create the output distribution n_out = 2 * len(x) if args.n_points is None else args.n_points[0] x_out = np.linspace(min(x), max(x), n_out) y_out = ECDF(x)(x_out) # send values to stdout if quiet specified if args.quiet: df_out = pd.DataFrame( {'x': x_out, 'p_less': y_out, 'p_greater': 1 - y_out}) df_out = df_out[['x', 'p_less', 'p_greater']] io_lib.df_to_output(args, df_out) return # set the appropriate theme ad make plot plot_lib.set_plot_styling(args) pl.plot(x_out, y_out, label='P({} < x)'.format(args.col[0])) pl.plot(x_out, 1. - y_out, label='P({} > x)'.format(args.col[0])) pl.xlabel('x') pl.legend(loc='best') plot_lib.refine_plot(args) plot_lib.show(args)
def main(): msg = textwrap.dedent( """ Creates faceted plots using seaborn FacetGrid. With this tool, you can create a group of plots which show aspects of the same dataset broken down in different ways. See the seaborn FacetGrid documentation for more detail. The --map argument to this function specifies a function to use for generating each of the plots. The following modules are available in the namespace: pl = pylab sns = seaborn ----------------------------------------------------------------------- Examples: * Scatterplot of tips vs bill for different combinations of sex, smoker, and day of the week: p.example_data -d tips | \\ p.facet_grid --row smoker --col sex --hue day \\ --map pl.scatter \\ --args total_bill tip --kwargs 'alpha=.2' 's=100' * Histogram of tips broken down by sex, smoker and day p.example_data -d tips | p.facet_grid --col day \\ --row sex --hue smoker --sharex --sharey --aspect 1 \\ --map pl.hist --args tip \\ --kwargs 'alpha=.2' 'range=[0, 10]' 'bins=20' ----------------------------------------------------------------------- """ ) # read command line arguments parser = argparse.ArgumentParser( formatter_class=argparse.RawDescriptionHelpFormatter, description=msg) arg_lib.add_args(parser, 'io_in') msg = 'Different values of this variable in separate rows' parser.add_argument( '--row', nargs=1, type=str, dest='row', metavar='row', help=msg) msg = 'Different values of this variable in separate columns' parser.add_argument( '--col', nargs=1, type=str, dest='col', metavar='col', help=msg) msg = 'Different values of this variable in separate colors' parser.add_argument( '--hue', nargs=1, type=str, dest='hue', metavar='hue', help=msg) msg = 'The aspect ratio of each plot' parser.add_argument( '--aspect', nargs=1, type=float, dest='aspect', metavar='aspect', default=[2], help=msg) msg = 'The size of each plot (default=4)' parser.add_argument( '--size', nargs=1, type=float, dest='size', metavar='size', help=msg, default=[4]) msg = 'The plotting function to use for each facet' parser.add_argument( '--map', nargs=1, type=str, dest='map', metavar='map', required=True, help=msg) msg = 'The args to pass to the plotting function' parser.add_argument( '--args', nargs='+', type=str, dest='args', metavar='args', required=True, help=msg) msg = 'Plotting function kwargs expressed as \'a=1\' \'b=2\' ... ' parser.add_argument( '--kwargs', nargs='+', type=str, dest='kwargs', metavar='kwargs', help=msg) msg = 'Share x axis' parser.add_argument('--sharex', action='store_true', dest='sharex', default=False, help=msg) msg = 'Share y axis' parser.add_argument('--sharey', action='store_true', dest='sharey', default=False, help=msg) msg = 'x axis limits when sharex=True' parser.add_argument( '--xlim', nargs=2, type=float, dest='xlim', metavar='xlim', help=msg) msg = 'y axis limits when sharex=True' parser.add_argument( '--ylim', nargs=2, type=float, dest='ylim', metavar='ylim', help=msg) msg = "Save the figure to this file" parser.add_argument('--savefig', nargs=1, type=str, help=msg) warnings.filterwarnings('ignore') # parse arguments args = parser.parse_args() # get the input dataframe df = io_lib.df_from_input(args) facet_grid_kwargs = { 'row': args.row[0] if args.row else None, 'col': args.col[0] if args.col else None, 'hue': args.hue[0] if args.hue else None, 'aspect': args.aspect[0], 'size': args.size[0], 'sharex': args.sharex, 'sharey': args.sharey, 'xlim': args.xlim if args.xlim else None, 'ylim': args.ylim if args.ylim else None, } grid = sns.FacetGrid(df, **facet_grid_kwargs) map_func_name = args.map[0] scope = {'pl': pl, 'sns': sns, 'map_func_name': map_func_name} exec('map_func = {}'.format(map_func_name), scope) map_func = scope['map_func'] map_args = args.args map_kwargs = {} if args.kwargs: for kwarg in args.kwargs: exec('map_kwargs.update(dict({}))'.format(kwarg)) grid.map(map_func, *map_args, **map_kwargs) # noqa defined in exec above grid.add_legend() plot_lib.show(args)
def main(): msg = textwrap.dedent( """ Creates faceted plots using seaborn FacetGrid. With this tool, you can create a group of plots which show aspects of the same dataset broken down in different ways. See the seaborn FacetGrid documentation for more detail. The --map argument to this function specifies a function to use for generating each of the plots. The following modules are available in the namespace: pl = pylab sns = seaborn ----------------------------------------------------------------------- Examples: * Scatterplot of tips vs bill for different combinations of sex, smoker, and day of the week: p.example_data -d tips | \\ p.facet_grid --row smoker --col sex --hue day \\ --map pl.scatter \\ --args total_bill tip --kwargs 'alpha=.2' 's=100' * Histogram of tips broken down by sex, smoker and day p.example_data -d tips | p.facet_grid --col day \\ --row sex --hue smoker --sharex --sharey --aspect 1 \\ --map pl.hist --args tip \\ --kwargs 'alpha=.2' 'range=[0, 10]' 'bins=20' ----------------------------------------------------------------------- """ ) # read command line arguments parser = argparse.ArgumentParser(formatter_class=argparse.RawDescriptionHelpFormatter, description=msg) arg_lib.add_args(parser, "io_in") msg = "Different values of this variable in separate rows" parser.add_argument("--row", nargs=1, type=str, dest="row", metavar="row", help=msg) msg = "Different values of this variable in separate columns" parser.add_argument("--col", nargs=1, type=str, dest="col", metavar="col", help=msg) msg = "Different values of this variable in separate colors" parser.add_argument("--hue", nargs=1, type=str, dest="hue", metavar="hue", help=msg) msg = "The aspect ratio of each plot" parser.add_argument("--aspect", nargs=1, type=float, dest="aspect", metavar="aspect", default=[2], help=msg) msg = "The size of each plot (default=4)" parser.add_argument("--size", nargs=1, type=float, dest="size", metavar="size", help=msg, default=[4]) msg = "The plotting function to use for each facet" parser.add_argument("--map", nargs=1, type=str, dest="map", metavar="map", required=True, help=msg) msg = "The args to pass to the plotting function" parser.add_argument("--args", nargs="+", type=str, dest="args", metavar="args", required=True, help=msg) msg = "Plotting function kwargs expressed as 'a=1' 'b=2' ... " parser.add_argument("--kwargs", nargs="+", type=str, dest="kwargs", metavar="kwargs", help=msg) msg = "Share x axis" parser.add_argument("--sharex", action="store_true", dest="sharex", default=False, help=msg) msg = "Share y axis" parser.add_argument("--sharey", action="store_true", dest="sharey", default=False, help=msg) msg = "x axis limits when sharex=True" parser.add_argument("--xlim", nargs=2, type=float, dest="xlim", metavar="xlim", help=msg) msg = "y axis limits when sharex=True" parser.add_argument("--ylim", nargs=2, type=float, dest="ylim", metavar="ylim", help=msg) msg = "Save the figure to this file" parser.add_argument("--savefig", nargs=1, type=str, help=msg) # parse arguments args = parser.parse_args() # get the input dataframe df = io_lib.df_from_input(args) facet_grid_kwargs = { "row": args.row[0] if args.row else None, "col": args.col[0] if args.col else None, "hue": args.hue[0] if args.hue else None, "aspect": args.aspect[0], "size": args.size[0], "sharex": args.sharex, "sharey": args.sharey, "xlim": args.xlim if args.xlim else None, "ylim": args.ylim if args.ylim else None, } grid = sns.FacetGrid(df, **facet_grid_kwargs) map_func_name = args.map[0] scope = {"pl": pl, "sns": sns, "map_func_name": map_func_name} exec("map_func = {}".format(map_func_name), scope) map_func = scope["map_func"] map_args = args.args map_kwargs = {} if args.kwargs: for kwarg in args.kwargs: exec("map_kwargs.update(dict({}))".format(kwarg)) grid.map(map_func, *map_args, **map_kwargs) # noqa defined in exec above grid.add_legend() plot_lib.show(args)
def main(): msg = textwrap.dedent(""" Performs (multivariable) linear regression. The fitting model is specified using the R-like, patsy syntax. Input is from stdin and output is either fitting information or the input data with columns added for the fit and residuals. ----------------------------------------------------------------------- Examples: * Fit a line to the sea-level data p.example_data -d sealevel | p.regress -m 'sealevel_mm ~ year' * Fit a trend plus annual cycle to sealevel data p.example_data -d sealevel \\ | p.df 'df["sin"] = np.sin(2 * np.pi * df.year)' \\ | p.df 'df["cos"] = np.cos(2 * np.pi * df.year)' \\ | p.regress -m 'sealevel_mm ~ year + cos + sin' * Examine residual ECDF of trend plus annual fit p.example_data -d sealevel \\ | p.df 'df["sin"] = np.sin(2 * np.pi * df.year)' \\ | p.df 'df["cos"] = np.cos(2 * np.pi * df.year)' \\ | p.regress -m 'sealevel_mm ~ year + cos + sin' --fit \\ | p.cdf -c 'resid_' --title 'ECDF of trend + annual' * Detrend sealevel data to more clearly reveal oscillations p.example_data -d sealevel \\ | p.regress -m 'sealevel_mm ~ year' --fit \\ | p.plot -x year -y resid_ --ylabel 'Trend removed (mm)' \\ --title 'Global Sea Surface Height' * Set origin of sealevel data to 0 and regress with no intercept p.example_data -d sealevel\\ | p.df 'df["year"] = df.year - df.year.iloc[0]'\\ 'df["sealevel_mm"] = df.sealevel_mm - df.sealevel_mm.iloc[0]'\\ | p.regress -m 'sealevel_mm ~ year - 1' --fit\\ | p.plot -x year -y sealevel_mm fit_ --style '.' '-'\\ --alpha .2 1 --legend best --title 'Force Zero Intercept' ----------------------------------------------------------------------- """) # read command line arguments parser = argparse.ArgumentParser( formatter_class=argparse.RawDescriptionHelpFormatter, description=msg) arg_lib.add_args(parser, 'io_in', 'io_out', 'example') # specify columns to histogram parser.add_argument("-m", "--model", type=str, nargs=1, required=True, help="The model expressed in patsy syntax") msg = "Return input with fit and residual appended" parser.add_argument("--fit", action="store_true", dest='retfit', default=False, help=msg) parser.add_argument("--plot", action="store_true", default=False, help="Make residual plots") # parse arguments args = parser.parse_args() # get the input dataframe df = io_lib.df_from_input(args) # fit the model and add fit, resid columns result = sm.ols(formula=args.model[0], data=df).fit() df['fit_'] = result.fittedvalues df['resid_'] = result.resid # add and output the fit results if requested if args.retfit: io_lib.df_to_output(args, df) return # print the fit summary sys.stdout.write('\n{}\n'.format(result.summary())) sys.stdout.flush() # do plots if requested if args.plot: pl.subplot(211) pl.plot(df.fit_, df.resid_, '.', alpha=.5) pl.xlabel('Fit') pl.ylabel('Residual') pl.title(args.model[0]) pl.subplot(212) sns.distplot(df.resid_, bins=50) pl.xlabel('Residual with R^2 = {:0.4f}'.format(result.rsquared)) pl.ylabel('Counts') # annoying issue with osx backend forces if statement here if mpl.get_backend().lower() in ['agg', 'macosx']: pl.gcf().set_tight_layout(True) else: pl.gcf().tight_layout() plot_lib.show(args)