Example #1
0
def main():
    msg = textwrap.dedent(
        """
        Create strings from a dataframe using python str.format() template.
        This tool is particularly useful for generating a list of commands
        that for piping into p.parallel.
        -----------------------------------------------------------------------
        Examples:

            * Create commands to touch a sequence of files in /tmp
                seq 10 | p.df --names n -i noheader\\
                | p.format -t 'touch /tmp/file{n:02d}.txt'
        -----------------------------------------------------------------------
        """
    )

    #  read command line arguments
    parser = argparse.ArgumentParser(
        formatter_class=argparse.RawDescriptionHelpFormatter, description=msg)

    arg_lib.add_args(parser, 'io_in')

    parser.add_argument('-t', '--template', required=True,
                        help='A python template string', nargs=1)

    # parse arguments
    args = parser.parse_args()

    # get the input dataframe
    df = io_lib.df_from_input(args)

    # write out the strings
    stream = OutStream(args.template[0])
    for rec in df.to_dict('records'):
        stream.write(**rec)
    def test_add_args(self,
                      get_config_mock,
                      _check_for_recognized_args_mock,
                      _io_in_adder_mock,
                      _io_out_adder_mock,
                      _decorating_adder_mock,
                      _xy_adder_mock):
        # set up the mocks
        parser = MagicMock()
        get_config_mock.return_value = {}

        # define expected call signatures
        plain_call_list = [call(parser)]
        config_call_list = [call(parser, {})]

        # call the code under test
        arg_lib.add_args(parser)

        # assert the proper call signatures
        self.assertEqual(get_config_mock.call_args_list, [call()])
        self.assertEqual(_check_for_recognized_args_mock.call_args_list,
                         [call()])
        self.assertEqual(_io_in_adder_mock.call_args_list, config_call_list)
        self.assertEqual(_io_out_adder_mock.call_args_list, config_call_list)
        self.assertEqual(_decorating_adder_mock.call_args_list, plain_call_list)
        self.assertEqual(_xy_adder_mock.call_args_list, plain_call_list)
Example #3
0
    def test_draw_xy_plot(self):
        """draw_xy_plot() properly produces an output html file
        """
        out_file = os.path.join(self.dir_name, "test.html")
        argv = (
            "p.plot -x x -y btrace ctrace -s o- --xlabel myxlabel "
            "--ylabel myylabel --title mytitle --theme darkgrid "
            "--context talk --palette muted -a .5 --nogrid "
            "--legend best --xlim 0 10 --ylim -10 10 "
            "--savefig {}".format(out_file)
        ).split()
        with patch("pandashells.lib.plot_lib.sys.argv", argv):
            pl.clf()
            df = pd.DataFrame({"x": range(10), "btrace": [-x for x in range(10)], "ctrace": [x for x in range(10)]})
            parser = argparse.ArgumentParser()
            arg_lib.add_args(parser, "io_in", "xy_plotting", "decorating", "example")

            parser.add_argument("-a", "--alpha", help="Set opacity", nargs=1, default=[1.0], type=float)
            args = parser.parse_args()
            plot_lib.draw_xy_plot(args, df)
            with open(out_file) as f:
                html = f.read()
                self.assertTrue("myxlabel" in html)
                self.assertTrue("myylabel" in html)
                self.assertTrue("mytitle" in html)
                self.assertTrue("btrace" in html)
                self.assertTrue("ctrace" in html)
                self.assertTrue("1" in html)
                self.assertTrue("10" in html)
Example #4
0
def main():
    msg = "Generate a linearly spaced set of data points."
    msg = textwrap.dedent(
        """
        Generate a linearly spaced set of data points.

        -----------------------------------------------------------------------
        Examples:

            * Generate 7 points between 1 and 10
                p.linspace 1 10 7

        -----------------------------------------------------------------------
        """
    )

    # read command line arguments
    parser = argparse.ArgumentParser(
        formatter_class=argparse.RawDescriptionHelpFormatter, description=msg)

    arg_lib.add_args(parser, 'io_out')

    msg = 'start end npoints'
    parser.add_argument("numbers", help=msg, type=str, nargs=3, metavar='')

    # parse arguments
    args = parser.parse_args()
    min_val, max_val = float(args.numbers[0]), float(args.numbers[1])
    N = int(args.numbers[2])

    df = pd.DataFrame({'c0': np.linspace(min_val, max_val, N)})

    # write dataframe to output
    io_lib.df_to_output(args, df)
Example #5
0
def main():
    msg = "Generate a linearly spaced set of data points."
    msg = textwrap.dedent("""
        Generate a linearly spaced set of data points.

        -----------------------------------------------------------------------
        Examples:

            * Generate 7 points between 1 and 10
                p.linspace 1 10 7

        -----------------------------------------------------------------------
        """)

    # read command line arguments
    parser = argparse.ArgumentParser(
        formatter_class=argparse.RawDescriptionHelpFormatter, description=msg)

    arg_lib.add_args(parser, 'io_out', 'example')

    msg = 'start end npoints'
    parser.add_argument("numbers", help=msg, type=str, nargs=3, metavar='')

    # parse arguments
    args = parser.parse_args()
    min_val, max_val = float(args.numbers[0]), float(args.numbers[1])
    N = int(args.numbers[2])

    df = pd.DataFrame({'c0': np.linspace(min_val, max_val, N)})

    # write dataframe to output
    io_lib.df_to_output(args, df)
Example #6
0
def main():
    msg = textwrap.dedent(
        """
        Create strings from a dataframe using python str.format() template.
        This tool is particularly useful for generating a list of commands
        that for piping into p.parallel.
        -----------------------------------------------------------------------
        Examples:

            * Create commands to touch a sequence of files in /tmp
                seq 10 | p.df --names n -i noheader\\
                | p.format -t 'touch /tmp/file{n:02d}.txt'
        -----------------------------------------------------------------------
        """
    )

    #  read command line arguments
    parser = argparse.ArgumentParser(
        formatter_class=argparse.RawDescriptionHelpFormatter, description=msg)

    arg_lib.add_args(parser, 'io_in')

    parser.add_argument('-t', '--template', required=True,
                        help='A python template string', nargs=1)

    # parse arguments
    args = parser.parse_args()

    # get the input dataframe
    df = io_lib.df_from_input(args)

    # write out the strings
    stream = OutStream(args.template[0])
    for rec in df.to_dict('records'):
        stream.write(**rec)
Example #7
0
def main():
    msg = textwrap.dedent(
        """
        Computes a spectrogram using the lomb-scargle algorithm provided by
        the gatspy module.  The input time series need not have evenly spaced
        time-stamps.  The FFT-based algorithm has complexity O[N*log(N)].

        -----------------------------------------------------------------------
        Examples:

            * Plot the spectrum of a simple sine wave
                  p.linspace 0 10 100 \\
                  | p.df 'df["value"] = 7 * np.sin(2*np.pi*df.time / 1.5)'\\
                        --names time\\
                  | p.lomb_scargle -t time -y value --interp_exp 3\\
                  | p.plot -x period -y amp --xlim 0 3

            * Show the annual and 59-day peaks in the sealevel spectrum
                p.example_data -d sealevel\\
                | p.df 'df["day"] = 365.25 * df.year'\\
                        'df["day"] = df.day - df.day.iloc[0]'\\
                | p.lomb_scargle -t day -y sealevel_mm --interp_exp 3\\
                | p.df 'df[df.period < 720]'\\
                | p.plot -x period -y amp --xlim 1 400\\
                         --title 'Sea-surface height spectrum'\\
                         --xlabel 'period (days)'

        -----------------------------------------------------------------------
        """
    )

    # read command line arguments
    parser = argparse.ArgumentParser(
        formatter_class=argparse.RawDescriptionHelpFormatter, description=msg)

    arg_lib.add_args(parser, 'io_in', 'io_out')

    parser.add_argument('-t', '--time_col', help='Time Column',
                        nargs=1, required=True, type=str)

    parser.add_argument('-y', '--observation_col', help='Observation column',
                        nargs=1, dest='val_col', required=True, type=str)

    parser.add_argument('--interp_exp', help='Interpolate by this power of 2',
                        nargs=1, type=int, default=[1])
    parser.add_argument(
        '--freq_order', action='store_true', dest='freq_order', default=False,
        help='Order output by freqency instead of period')

    # parse arguments
    args = parser.parse_args()

    # get the input dataframe
    df = io_lib.df_from_input(args)
    df = lomb_scargle_lib.lomb_scargle(
        df, args.time_col[0], args.val_col[0], args.interp_exp[0],
        args.freq_order)

    # write dataframe to output
    io_lib.df_to_output(args, df)
Example #8
0
def get_input_args():
    msg = textwrap.dedent(
        """
        Plot histograms from input data.  Can either plot just a single
        histogram or a grid of histograms with different columns of data.
        When multiple columns are specified, creates a grid of histograms,
        one for each specified column.

        -----------------------------------------------------------------------
        Examples:

            * Plot histogram of a beta distriubtion
                p.rand -t beta --alpha 3 --beta 10 -n 10000\\
                | p.hist --names beta -n 50

            * Plot a sid-by-side comparison of a gamma and normal distriubtion
              paste <(p.rand -t normal  -n 10000 | p.df --names normal)\\
                    <(p.rand -t gamma   -n 10000 | p.df --names gamma)\\
              | p.hist -i table -c normal gamma
        -----------------------------------------------------------------------
        """
    )

    #  read command line arguments
    parser = argparse.ArgumentParser(
        formatter_class=argparse.RawDescriptionHelpFormatter, description=msg)

    arg_lib.add_args(parser, 'io_in', 'io_out', 'example', 'decorating')

    # specify columns to histogram
    parser.add_argument(
        '-c', '--cols', help='Column(s) to histogram', nargs='+')
    parser.add_argument(
        '-q', '--quiet', action='store_true', default=False,
        help='Quiet mean no plots. Send numeric output to stdout instead')
    parser.add_argument(
        '-n', '--nbins', help='Number of bins (default=30)', nargs=1,
        default=[30], type=int)
    parser.add_argument(
        '-r', '--range', help='Range (min max) of x axis', nargs=2,
        default=None, type=float)
    parser.add_argument(
        '-l', '--layout', help='Layout (rows, cols)',
        nargs=2, default=None, type=int)
    parser.add_argument(
        '-a', '--alpha', help='Set opacity of hist bars', nargs=1,
        default=[1.], type=float)
    parser.add_argument(
        '-d', '--density', action='store_true', default=False,
        help='Show probability density instead of counts')
    parser.add_argument(
        '--sharex', action='store_true', default=False,
        help='Make all x axes have the same range')
    parser.add_argument(
        '--sharey', action='store_true', default=False,
        help='Make all y axes have the same range')
    return parser.parse_args()
Example #9
0
def main():
    msg = textwrap.dedent("""
        Remove outliers from DataFrame columns using a recursive sigma-edit
        algorithm.  The algorithm will recursively NaN out values greater than
        sigma_thresh standard deviations away from sample mean.

        -----------------------------------------------------------------------
        Examples:

            * Do a 2.5-sigma edit on a gamma distribution and show histogram
                p.rand -n 1000 -t gamma --alpha=3 --beta=.01\\
                | p.df 'df["c1"] = df.c0'\\
                | p.sig_edit -c c1 -t 2.5\\
                | p.df 'pd.melt(df)' --names raw edited\\
                | p.facet_grid --hue variable --map pl.hist\\
                   --args value --kwargs 'alpha=.2' 'range=[0, 1000]' 'bins=50'
        -----------------------------------------------------------------------
        """)

    #  read command line arguments
    parser = argparse.ArgumentParser(
        formatter_class=argparse.RawDescriptionHelpFormatter, description=msg)

    arg_lib.add_args(parser, 'io_in', 'io_out', 'example')

    parser.add_argument("-t",
                        "--sigma_thresh",
                        help="Sigma threshold",
                        nargs=1,
                        required=True,
                        type=float)
    parser.add_argument("-c",
                        "--cols",
                        required=True,
                        help="Column(s) to sigma-edit",
                        nargs="+")
    parser.add_argument("--max_iter",
                        help="Max number of recursions",
                        nargs=1,
                        type=int,
                        default=[20])

    # parse arguments
    args = parser.parse_args()

    # get the input dataframe
    df = io_lib.df_from_input(args)
    df = outlier_lib.sigma_edit_dataframe(args.sigma_thresh[0],
                                          args.cols,
                                          df,
                                          max_iter=args.max_iter[0])

    # write dataframe to output
    io_lib.df_to_output(args, df)
Example #10
0
def main():
    msg = textwrap.dedent("""
        Creates interactive xy plots.  Loosely based around matplotlib's
        pyplot.plot command.

        -----------------------------------------------------------------------
        Examples:

            * Really simple plot
                p.linspace 1 10 7 | p.plot -x c0 -y c0

            * Plot two traces
                p.linspace 0 6.28 100\\
                | p.df 'df["cos"]=np.cos(df.t)' 'df["sin"]=np.sin(df.t)'\\
                        --names t\\
                | p.plot -x t -y sin cos\\
                         --style '.-' 'o-' --alpha 1 .2 --legend best

            * Plot sea-level time series
                p.example_data -d sealevel\\
                | p.plot -x year -y sealevel_mm --style '.'\\
                --xlabel year --ylabel 'relative sea level (mm)'\\
                --title 'Sea Level Rise' --legend best --xlim 1995 2015
        -----------------------------------------------------------------------
        """)

    #  read command line arguments
    parser = argparse.ArgumentParser(
        formatter_class=argparse.RawDescriptionHelpFormatter, description=msg)

    arg_lib.add_args(parser, 'io_in', 'xy_plotting', 'decorating')

    parser.add_argument("-a",
                        "--alpha",
                        help="Set opacity level(s)",
                        nargs='+',
                        default=[1.],
                        type=float,
                        metavar='alpha')

    # parse arguments
    args = parser.parse_args()

    # get the input dataframe
    df = io_lib.df_from_input(args)

    # set the appropriate theme
    plot_lib.set_plot_styling(args)

    # draw the plot
    plot_lib.draw_xy_plot(args, df)
Example #11
0
def main():
    msg = textwrap.dedent(
        """
        Creates interactive xy plots.  Loosely based around matplotlib's
        pyplot.plot command.

        -----------------------------------------------------------------------
        Examples:

            * Really simple plot
                p.linspace 1 10 7 | p.plot -x c0 -y c0

            * Plot two traces
                p.linspace 0 6.28 100\\
                | p.df 'df["cos"]=np.cos(df.t)' 'df["sin"]=np.sin(df.t)'\\
                        --names t\\
                | p.plot -x t -y sin cos\\
                         --style '.-' 'o-' --alpha 1 .2 --legend best

            * Plot sea-level time series
                p.example_data -d sealevel\\
                | p.plot -x year -y sealevel_mm --style '.'\\
                --xlabel year --ylabel 'relative sea level (mm)'\\
                --title 'Sea Level Rise' --legend best --xlim 1995 2015
        -----------------------------------------------------------------------
        """
    )

    #  read command line arguments
    parser = argparse.ArgumentParser(
        formatter_class=argparse.RawDescriptionHelpFormatter, description=msg)

    arg_lib.add_args(parser, 'io_in', 'xy_plotting', 'decorating')

    parser.add_argument(
        "-a", "--alpha", help="Set opacity level(s)", nargs='+', default=[1.],
        type=float, metavar='alpha')

    # parse arguments
    args = parser.parse_args()

    # get the input dataframe
    df = io_lib.df_from_input(args)

    # set the appropriate theme
    plot_lib.set_plot_styling(args)

    # draw the plot
    plot_lib.draw_xy_plot(args, df)
Example #12
0
def get_input_args():
    msg = textwrap.dedent("""
        Smooths data in specified column.  Uses algorithm[1] from the
        supersmoother python package for smoothing with cross validation
        to determine best span.

        [1] Friedman, J. H. (1984) A variable span scatterplot smoother.
            Laboratory for Computational Statistics, Stanford University
            Technical Report No. 5.
            pdf: http://www.slac.stanford.edu/cgi-wrap/getdoc/slac-pub-3477.pdf


        -----------------------------------------------------------------------
        Examples:

           * Smooth sea level time series
                 p.example_data -d sealevel \\
                 | p.df 'df["smoothed"] = df.sealevel_mm' \\
                 | p.smooth -x year -y smoothed \\
                 | p.plot -x year -y sealevel_mm smoothed \\
                   --legend best -s .  '-' --alpha .5 1

           * Now pretend year doesn't exist and treat as equally spaced
                 p.example_data -d sealevel \\
                 | p.df 'df["smoothed"] = df.sealevel_mm' \\
                 | p.smooth -y smoothed \\
                 | p.plot -x year -y sealevel_mm smoothed \\
                   --legend best -s .  '-' --alpha .5 1
        -----------------------------------------------------------------------
        """)

    #  read command line arguments
    parser = argparse.ArgumentParser(
        formatter_class=argparse.RawDescriptionHelpFormatter, description=msg)

    arg_lib.add_args(parser, 'io_in', 'io_out')

    # specify columns to histogram
    parser.add_argument('-y',
                        dest='y',
                        help='Column(s) to smooth',
                        nargs='+',
                        required=True)
    parser.add_argument('-x',
                        dest='x',
                        help='Optional: if y=f(x), specify x',
                        nargs=1)
    return parser.parse_args()
Example #13
0
def get_input_args():
    msg = textwrap.dedent(
        """
        Smooths data in specified column.  Uses algorithm[1] from the
        supersmoother python package for smoothing with cross validation
        to determine best span.

        [1] Friedman, J. H. (1984) A variable span scatterplot smoother.
            Laboratory for Computational Statistics, Stanford University
            Technical Report No. 5.
            pdf: http://www.slac.stanford.edu/cgi-wrap/getdoc/slac-pub-3477.pdf


        -----------------------------------------------------------------------
        Examples:

           * Smooth sea level time series
                 p.example_data -d sealevel \\
                 | p.df 'df["smoothed"] = df.sealevel_mm' \\
                 | p.smooth -x year -y smoothed \\
                 | p.plot -x year -y sealevel_mm smoothed \\
                   --legend best -s .  '-' --alpha .5 1

           * Now pretend year doesn't exist and treat as equally spaced
                 p.example_data -d sealevel \\
                 | p.df 'df["smoothed"] = df.sealevel_mm' \\
                 | p.smooth -y smoothed \\
                 | p.plot -x year -y sealevel_mm smoothed \\
                   --legend best -s .  '-' --alpha .5 1
        -----------------------------------------------------------------------
        """
    )

    #  read command line arguments
    parser = argparse.ArgumentParser(
        formatter_class=argparse.RawDescriptionHelpFormatter, description=msg)

    arg_lib.add_args(parser, 'io_in', 'io_out')

    # specify columns to histogram
    parser.add_argument(
        '-y', dest='y', help='Column(s) to smooth', nargs='+', required=True)
    parser.add_argument(
        '-x', dest='x', help='Optional: if y=f(x), specify x', nargs=1)
    return parser.parse_args()
Example #14
0
def main():
    msg = textwrap.dedent(
        """
        Remove outliers from DataFrame columns using a recursive sigma-edit
        algorithm.  The algorithm will recursively NaN out values greater than
        sigma_thresh standard deviations away from sample mean.

        -----------------------------------------------------------------------
        Examples:

            * Do a 2.5-sigma edit on a gamma distribution and show histogram
                p.rand -n 1000 -t gamma --alpha=3 --beta=.01\\
                | p.df 'df["c1"] = df.c0'\\
                | p.sig_edit -c c1 -t 2.5\\
                | p.df 'pd.melt(df)' --names raw edited\\
                | p.facet_grid --hue variable --map pl.hist\\
                   --args value --kwargs 'alpha=.2' 'range=[0, 1000]' 'bins=50'
        -----------------------------------------------------------------------
        """
    )

    #  read command line arguments
    parser = argparse.ArgumentParser(
        formatter_class=argparse.RawDescriptionHelpFormatter, description=msg)

    arg_lib.add_args(parser, 'io_in', 'io_out', 'example')

    parser.add_argument("-t", "--sigma_thresh", help="Sigma threshold",
                        nargs=1, required=True, type=float)
    parser.add_argument("-c", "--cols", required=True,
                        help="Column(s) to sigma-edit", nargs="+")
    parser.add_argument("--max_iter", help="Max number of recursions",
                        nargs=1, type=int, default=[20])

    # parse arguments
    args = parser.parse_args()

    # get the input dataframe
    df = io_lib.df_from_input(args)
    df = outlier_lib.sigma_edit_dataframe(
        args.sigma_thresh[0], args.cols, df, max_iter=args.max_iter[0])

    # write dataframe to output
    io_lib.df_to_output(args, df)
Example #15
0
    def test_draw_xy_plot(self):
        """draw_xy_plot() properly produces an output html file
        """
        out_file = os.path.join(self.dir_name, 'test.html')
        argv = (
            'p.plot -x x -y btrace ctrace -s o- --xlabel myxlabel '
            '--ylabel myylabel --title mytitle --theme darkgrid '
            '--context talk --palette muted -a .5 --nogrid '
            '--legend best --xlim 0 10 --ylim -10 10 '
            '--savefig {}'.format(out_file)
        ).split()
        with patch('pandashells.lib.plot_lib.sys.argv', argv):
            pl.clf()
            df = pd.DataFrame(
                {
                    'x': range(10),
                    'btrace': [-x for x in range(10)],
                    'ctrace': [x for x in range(10)]
                })
            parser = argparse.ArgumentParser()
            arg_lib.add_args(
                parser, 'io_in', 'xy_plotting', 'decorating', 'example')

            parser.add_argument(
                "-a", "--alpha", help="Set opacity", nargs=1, default=[1.],
                type=float)
            args = parser.parse_args()
            plot_lib.draw_xy_plot(args, df)
            with open(out_file) as f:
                html = f.read()
                self.assertTrue('myxlabel' in html)
                self.assertTrue('myylabel' in html)
                self.assertTrue('mytitle' in html)
                self.assertTrue('btrace' in html)
                self.assertTrue('ctrace' in html)
                self.assertTrue('1' in html)
                self.assertTrue('10' in html)
Example #16
0
    def test_draw_xy_plot(self):
        """draw_xy_plot() properly produces an output html file
        """
        out_file = os.path.join(self.dir_name, 'test.html')
        argv = (
            'p.plot -x x -y btrace ctrace -s o- --xlabel myxlabel '
            '--ylabel myylabel --title mytitle --theme darkgrid '
            '--context talk --palette muted -a .5 --nogrid '
            '--legend best --xlim 0 10 --ylim -10 10 '
            '--savefig {}'.format(out_file)
        ).split()
        with patch('pandashells.lib.plot_lib.sys.argv', argv):
            pl.clf()
            df = pd.DataFrame(
                {
                    'x': range(10),
                    'btrace': [-x for x in range(10)],
                    'ctrace': [x for x in range(10)]
                })
            parser = argparse.ArgumentParser()
            arg_lib.add_args(
                parser, 'io_in', 'xy_plotting', 'decorating')

            parser.add_argument(
                "-a", "--alpha", help="Set opacity", nargs=1, default=[1.],
                type=float)
            args = parser.parse_args()
            plot_lib.draw_xy_plot(args, df)
            with open(out_file) as f:
                html = f.read()
                self.assertTrue('myxlabel' in html)
                self.assertTrue('myylabel' in html)
                self.assertTrue('mytitle' in html)
                self.assertTrue('btrace' in html)
                self.assertTrue('ctrace' in html)
                self.assertTrue('1' in html)
                self.assertTrue('10' in html)
Example #17
0
def main():  # pragma: no cover
    # read command line arguments
    msg = textwrap.dedent(
        """
        Enables pandas dataframe processing at the unix command line.

        This is the real workhorse of the pandashells toolkit.  It reads data
        from stdin as a dataframe, which is passed through any number of pandas
        operations provided on the command line.  Output is always to stdout.

        Each operation assumes data is in a dataframe named df.  Operations
        performed on this dataframe will overwrite the df variable with
        the results of that operation.  Special consideration is taken for
        assignments such as df['a'] = df.b + df.c.  These are understood
        to agument the input dataframe with a new column. By way of example,
        this command:
            p.df 'df.groupby(by="a").b.count()' 'df.reset_index()'
        is equivalent to the python expressions:
            df = df.groupby(by="a").b.count()
            df = df.reset_index()

        In addition to providing access to pandas dataframes, a number of
        modules are loaded into the namespace so as to be accessible from the
        command line.  These modules are:
            pd = pandas
            np = numpy
            scp = scipy
            pl = pylab
            parse = dateutil.parser.parse
            datetime = datetime
            re = re

        ** Important **
        When creating chains of dataframe operations (see examples), it is
        important to express your chain of operations before any options. This
        is because some options can take multiple arguments and the parser
        won't be able to properly decode your meaning.
        For example:
            cat file.csv | p.df 'df["x"] = df.y + 1' -o table noheader  # GOOD
            cat file.csv | p.df -o table noheader 'df["x"] = df.y + 1'  # BAD

        Input can be read in different formats as specified by the -i switch.
        The most common formats are csv and table (white-space-delimited).  In
        either of these formats, p.df can accomodate input data that either
        does or doesn not have a header row.  When no header row is indicated,
        The columns of the Dataframe will be labeled as c0, c1, ..., cN.

        Plotting methods invoked on a Dataframe generate no output, but
        create an interactive plot instead.  There are a number of plot
        specific options available at the command line that govern the details
        of how these plots are rendered (e.g. --xlim, --legend, etc).

        -----------------------------------------------------------------------
        Examples:

            * Print a csv file in nice tabular format
                p.example_data -d tips | p.df -o table | head

            * Print a csv file to json
                p.example_data -d tips | head | p.df -o json

            * Transform csv to json then to table
                p.example_data -d tips | head | p.df -o json \\
                | p.df -i json -o table

            * Select by row
                p.example_data -d tips \\
                | p.df 'df[df.sex=="Female"]' 'df[df.smoker=="Yes"]' -o table

            * Extract columns
                p.example_data -d tips \\
                | p.df 'df[["total_bill", "tip"]].head()' -o table

            * Perform grouped aggregations
                p.example_data -d tips | p.df \\
                'df.groupby(by=["sex", "smoker"]).tip.sum()' -o table index

            * Use pandas plotting methods
                p.example_data -d tips | p.df \\
                'df.groupby(by="day").total_bill.sum().plot(kind="barh")'\\
                --xlabel 'Dollars' --title 'Total Bills by Day'

            * Convert between tabular and csv format with/without header rows
                seq 10 | awk '{print $1, 2*$1}'\\
                | p.df --names a b -i table noheader | p.df -o table noheader

        -----------------------------------------------------------------------
        """
    )
    from pandashells.lib import arg_lib

    parser = argparse.ArgumentParser(
        formatter_class=argparse.RawDescriptionHelpFormatter, description=msg)
    arg_lib.add_args(parser, 'io_in', 'io_out', 'decorating')
    msg = (
        '(MUST come before any options) '
        '[statement ...] Statement(s) to execute. '
    )
    parser.add_argument(
        "statement", help=msg, nargs="*")
    args = parser.parse_args()

    get_modules_and_shortcuts(args.statement)
    from pandashells.lib import io_lib

    # get the input dataframe
    df = io_lib.df_from_input(args)

    # execute the statements in order
    # plot commands are terminal statements so will call sys.exit()
    for cmd in args.statement:
        df = process_command(args, cmd, df)

    # write the output
    io_lib.df_to_output(args, df)
Example #18
0
def main():
    msg = textwrap.dedent(
        """
        Plots the emperical cumulative distribution function (ECDF).

        -----------------------------------------------------------------------
        Examples:

            * Plot ECDF for 10k samples from the standard normal distribution.
                p.rand -t normal -n 10000 | p.cdf -c c0

            * Instead of plotting, send ECDF values to stdout
                p.rand -t normal -n 10000 | p.cdf -c c0 -q | head
        -----------------------------------------------------------------------
        """
    )

    # read command line arguments
    parser = argparse.ArgumentParser(
        formatter_class=argparse.RawDescriptionHelpFormatter, description=msg)

    # specify column to use
    parser.add_argument(
        "-c", "--col", required=True, nargs=1,
        help="Column to plot distribution")
    parser.add_argument(
        '-n', '--n_points', nargs=1, type=int,
        help='Number of output points (default is twice input len)')
    parser.add_argument(
        '-q', '--quiet', action='store_true', default=False,
        help='Quiet mean no plots. Send numeric output to stdout instead')

    # parse arguments
    arg_lib.add_args(parser, 'decorating', 'io_in', 'io_out',)
    args = parser.parse_args()

    # get the input dataframe and extract column
    df = io_lib.df_from_input(args)
    x = df[args.col[0]].values

    # create the output distribution
    n_out = 2 * len(x) if args.n_points is None else args.n_points[0]
    x_out = np.linspace(min(x), max(x), n_out)
    y_out = ECDF(x)(x_out)

    # send values to stdout if quiet specified
    if args.quiet:
        df_out = pd.DataFrame(
            {'x': x_out, 'p_less': y_out, 'p_greater': 1 - y_out})
        df_out = df_out[['x', 'p_less', 'p_greater']]
        io_lib.df_to_output(args, df_out)
        return

    # set the appropriate theme ad make plot
    plot_lib.set_plot_styling(args)
    pl.plot(x_out, y_out, label='P({} < x)'.format(args.col[0]))
    pl.plot(x_out, 1. - y_out, label='P({} > x)'.format(args.col[0]))
    pl.xlabel('x')
    pl.legend(loc='best')

    plot_lib.refine_plot(args)
    plot_lib.show(args)
Example #19
0
def main():
    msg = textwrap.dedent(
        """
        Performs (multivariable) linear regression.  The fitting model
        is specified using the R-like, patsy syntax.  Input is from stdin
        and output is either fitting information or the input data
        with columns added for the fit and residuals.

        -----------------------------------------------------------------------
        Examples:
            * Fit a line to the sea-level data
                p.example_data -d sealevel | p.regress -m 'sealevel_mm ~ year'

            * Fit a trend plus annual cycle to sealevel data
                p.example_data -d sealevel \\
                | p.df 'df["sin"] =  np.sin(2 * np.pi * df.year)' \\
                | p.df 'df["cos"] = np.cos(2 * np.pi * df.year)' \\
                | p.regress -m 'sealevel_mm ~ year + cos + sin'

            * Examine residual ECDF of trend plus annual fit
                p.example_data -d sealevel \\
                | p.df 'df["sin"] =  np.sin(2 * np.pi * df.year)' \\
                | p.df 'df["cos"] = np.cos(2 * np.pi * df.year)' \\
                | p.regress -m 'sealevel_mm ~ year + cos + sin' --fit \\
                | p.cdf -c 'resid_' --title 'ECDF of trend + annual'

            * Detrend sealevel data to more clearly reveal oscillations
                p.example_data -d sealevel \\
                | p.regress -m 'sealevel_mm ~ year' --fit \\
                | p.plot -x year -y resid_ --ylabel 'Trend removed (mm)' \\
                         --title 'Global Sea Surface Height'

            * Set origin of sealevel data to 0 and regress with no intercept
                p.example_data -d sealevel\\
                | p.df 'df["year"] = df.year - df.year.iloc[0]'\\
                'df["sealevel_mm"] = df.sealevel_mm - df.sealevel_mm.iloc[0]'\\
                | p.regress -m 'sealevel_mm ~ year - 1' --fit\\
                | p.plot -x year -y sealevel_mm fit_ --style '.' '-'\\
                     --alpha .2 1 --legend best --title 'Force Zero Intercept'

        -----------------------------------------------------------------------
        """
    )

    #  read command line arguments
    parser = argparse.ArgumentParser(
        formatter_class=argparse.RawDescriptionHelpFormatter, description=msg)

    arg_lib.add_args(parser, 'io_in', 'io_out')

    # specify columns to histogram
    parser.add_argument("-m", "--model", type=str, nargs=1, required=True,
                        help="The model expressed in patsy syntax")

    msg = "Return input with fit and residual appended"
    parser.add_argument("--fit", action="store_true", dest='retfit',
                        default=False, help=msg)

    parser.add_argument("--plot", action="store_true",
                        default=False, help="Make residual plots")

    # parse arguments
    args = parser.parse_args()

    # get the input dataframe
    df = io_lib.df_from_input(args)

    # fit the model and add fit, resid columns
    result = sm.ols(formula=args.model[0], data=df).fit()
    df['fit_'] = result.fittedvalues
    df['resid_'] = result.resid

    # add and output the fit results if requested
    if args.retfit:
        io_lib.df_to_output(args, df)
        return

    # print the fit summary
    sys.stdout.write('\n{}\n'.format(result.summary()))
    sys.stdout.flush()

    # do plots if requested
    if args.plot:
        module_checker_lib.check_for_modules(['matplotlib', 'seaborn'])
        plot_lib = get_module('pandashells.lib.plot_lib')
        mpl = get_module('matplotlib')
        pl = get_module('pylab')
        sns = get_module('seaborn')

        pl.subplot(211)
        pl.plot(df.fit_, df.resid_, '.', alpha=.5)
        pl.xlabel('Fit')
        pl.ylabel('Residual')
        pl.title(args.model[0])

        pl.subplot(212)
        sns.distplot(df.resid_, bins=50)
        pl.xlabel('Residual with R^2 = {:0.4f}'.format(result.rsquared))
        pl.ylabel('Counts')

        # annoying issue with osx backend forces if statement here
        if mpl.get_backend().lower() in ['agg', 'macosx']:
            pl.gcf().set_tight_layout(True)
        else:
            pl.gcf().tight_layout()

        plot_lib.show(args)
Example #20
0
def main():
    msg = textwrap.dedent(
        """
        Return random samples from common probability distrubtions.

        -----------------------------------------------------------------------
        Examples:

            uniform:  p.rand -n 1000 -t uniform  --min=0    --max=1   | p.hist
            normal:   p.rand -n 1000 -t normal   --mu=0     --sigma=1 | p.hist
            poisson:  p.rand -n 1000 -t poisson  --mu=1               | p.hist
            beta:     p.rand -n 1000 -t beta     --alpha=2  --beta=6  | p.hist
            gamma:    p.rand -n 1000 -t gamma    --alpha=1  --beta=1  | p.hist
            binomial: p.rand -n 1000 -t binomial --N=10     --p=0.4   | p.hist
        -----------------------------------------------------------------------
        """
    )

    # read command line arguments
    parser = argparse.ArgumentParser(
        formatter_class=argparse.RawDescriptionHelpFormatter, description=msg)

    parser.add_argument(
        '-t', '--type', nargs=1, type=str, default=['uniform'],
        choices=['uniform', 'normal', 'beta', 'gamma', 'binomial', 'poisson'],
        help='type of distribution (default=\'uniform\')')
    parser.add_argument(
        '-n', '--num_samples', nargs=1, default=[10], type=int,
        help='The number of rows to generate (default=10)')
    parser.add_argument(
        '-c', '--columns', nargs=1, default=[1], type=int,
        help='The number of columns to generate per row (default=1)')
    parser.add_argument(
        '--N', nargs=1, default=[10], type=int,
        help=(
            '(Binomial Dist) Largest possible value for random variable. '
            '(default=10)'
        )
    )
    parser.add_argument(
        '--p', nargs=1, default=[.5], type=float,
        help=(
            '(Binomial Dist) Bernoulli probability for each trial'
            '(default=.5)'
        )
    )
    parser.add_argument(
        '--mu', nargs=1, type=float,
        help='(Normal, Poisson) Mean (defaults: normal:0, poisson:1')
    parser.add_argument(
        '--sigma', nargs=1, default=[1.], type=float,
        help='(Normal) standard deviation, (default: 1)')
    parser.add_argument(
        '--min', nargs=1, default=[0.], type=float,
        help='(Uniform) Minimum value of range, (default: 0)')
    parser.add_argument(
        '--max', nargs=1, default=[1.], type=float,
        help='(Uniform) Maximum value of range, (default: 1)')
    parser.add_argument(
        '--alpha', nargs=1, default=[2.], type=float,
        help='(Beta, Gamma)  (default: 2)')
    parser.add_argument(
        '--beta', nargs=1, default=[2.], type=float,
        help='(Beta, Gamma)  (default: 2)')

    arg_lib.add_args(parser, 'io_out', 'example')

    # parse arguments
    args = parser.parse_args()

    # set some defaults
    args = fill_default_mu(args)

    # get the samples
    df = get_samples(args)

    # write dataframe to output
    io_lib.df_to_output(args, df)
Example #21
0
def main():
    msg = textwrap.dedent(
        """
        Tool to merge datasets.  Similar functionality to database
        joins. The arguments closely parallel those of the pandas merge
        command.  See the pandas merge documentation for more details.

        -----------------------------------------------------------------------
        Examples:

            * Merge election polls with electoral-college numbers
                p.merge <(p.example_data -d election) \\
                        <(p.example_data -d electoral_college) \\
                        --how left --on state \\
                | p.df -o table | head
        -----------------------------------------------------------------------
        """
    )

    # read command line arguments
    parser = argparse.ArgumentParser(
        formatter_class=argparse.RawDescriptionHelpFormatter, description=msg)

    arg_lib.add_args(parser, 'io_in', 'io_out', 'example')

    parser.add_argument('--how', choices=['left', 'right', 'inner', 'outer'],
                        dest='how', default=['inner'], nargs=1,
                        help="Type of join.  Default='inner'")

    msg = 'List of of columns on which to join'
    parser.add_argument('--on', nargs='+', metavar='col',
                        type=str, dest='on', help=msg)

    msg = 'List of of columns from left file to join on. '
    parser.add_argument('--left_on', nargs='+', metavar='col',
                        type=str, dest='left_on', help=msg)

    msg = 'List of of columns from right file to join on. '
    parser.add_argument('--right_on', nargs='+', metavar='col',
                        type=str, dest='right_on', help=msg)

    msg = 'List of suffixes appended to identically '
    msg += 'named columns'
    parser.add_argument('--suffixes', nargs=2, metavar='_x _y',
                        type=str, dest='suffixes', default=['_x', '_y'],
                        help=msg)

    parser.add_argument("file", help="Files to join", nargs=2, type=str,
                        metavar='file')

    args = parser.parse_args()
    validate_args(args)

    # get merge options from cli
    how = args.how[0]
    on = args.on if args.on else None
    left_on = args.left_on if args.left_on else None
    right_on = args.right_on if args.right_on else None
    suffixes = args.suffixes

    # get file names
    left_name, right_name = tuple(args.file)

    # load the dataframes
    df_left = io_lib.df_from_input(args, left_name)
    df_right = io_lib.df_from_input(args, right_name)

    # perform the merge
    dfj = pd.merge(df_left, df_right, how=how, on=on, left_on=left_on,
                   right_on=right_on, sort=True, suffixes=suffixes)

    # output the joined frame
    io_lib.df_to_output(args, dfj)
Example #22
0
def main():
    msg = textwrap.dedent("""
        Tool to merge datasets.  Similar functionality to database
        joins. The arguments closely parallel those of the pandas merge
        command.  See the pandas merge documentation for more details.

        -----------------------------------------------------------------------
        Examples:

            * Merge election polls with electoral-college numbers
                p.merge <(p.example_data -d election) \\
                        <(p.example_data -d electoral_college) \\
                        --how left --on state \\
                | p.df -o table | head
        -----------------------------------------------------------------------
        """)

    # read command line arguments
    parser = argparse.ArgumentParser(
        formatter_class=argparse.RawDescriptionHelpFormatter, description=msg)

    arg_lib.add_args(parser, 'io_in', 'io_out', 'example')

    parser.add_argument('--how',
                        choices=['left', 'right', 'inner', 'outer'],
                        dest='how',
                        default=['inner'],
                        nargs=1,
                        help="Type of join.  Default='inner'")

    msg = 'List of of columns on which to join'
    parser.add_argument('--on',
                        nargs='+',
                        metavar='col',
                        type=str,
                        dest='on',
                        help=msg)

    msg = 'List of of columns from left file to join on. '
    parser.add_argument('--left_on',
                        nargs='+',
                        metavar='col',
                        type=str,
                        dest='left_on',
                        help=msg)

    msg = 'List of of columns from right file to join on. '
    parser.add_argument('--right_on',
                        nargs='+',
                        metavar='col',
                        type=str,
                        dest='right_on',
                        help=msg)

    msg = 'List of suffixes appended to identically '
    msg += 'named columns'
    parser.add_argument('--suffixes',
                        nargs=2,
                        metavar='_x _y',
                        type=str,
                        dest='suffixes',
                        default=['_x', '_y'],
                        help=msg)

    parser.add_argument("file",
                        help="Files to join",
                        nargs=2,
                        type=str,
                        metavar='file')

    args = parser.parse_args()
    validate_args(args)

    # get merge options from cli
    how = args.how[0]
    on = args.on if args.on else None
    left_on = args.left_on if args.left_on else None
    right_on = args.right_on if args.right_on else None
    suffixes = args.suffixes

    # get file names
    left_name, right_name = tuple(args.file)

    # load the dataframes
    df_left = io_lib.df_from_input(args, left_name)
    df_right = io_lib.df_from_input(args, right_name)

    # perform the merge
    dfj = pd.merge(df_left,
                   df_right,
                   how=how,
                   on=on,
                   left_on=left_on,
                   right_on=right_on,
                   sort=True,
                   suffixes=suffixes)

    # output the joined frame
    io_lib.df_to_output(args, dfj)
Example #23
0
def main():
    msg = textwrap.dedent("""
        Performs (multivariable) linear regression.  The fitting model
        is specified using the R-like, patsy syntax.  Input is from stdin
        and output is either fitting information or the input data
        with columns added for the fit and residuals.

        -----------------------------------------------------------------------
        Examples:
            * Fit a line to the sea-level data
                p.example_data -d sealevel | p.regress -m 'sealevel_mm ~ year'

            * Fit a trend plus annual cycle to sealevel data
                p.example_data -d sealevel \\
                | p.df 'df["sin"] =  np.sin(2 * np.pi * df.year)' \\
                | p.df 'df["cos"] = np.cos(2 * np.pi * df.year)' \\
                | p.regress -m 'sealevel_mm ~ year + cos + sin'

            * Examine residual ECDF of trend plus annual fit
                p.example_data -d sealevel \\
                | p.df 'df["sin"] =  np.sin(2 * np.pi * df.year)' \\
                | p.df 'df["cos"] = np.cos(2 * np.pi * df.year)' \\
                | p.regress -m 'sealevel_mm ~ year + cos + sin' --fit \\
                | p.cdf -c 'resid_' --title 'ECDF of trend + annual'

            * Detrend sealevel data to more clearly reveal oscillations
                p.example_data -d sealevel \\
                | p.regress -m 'sealevel_mm ~ year' --fit \\
                | p.plot -x year -y resid_ --ylabel 'Trend removed (mm)' \\
                         --title 'Global Sea Surface Height'

            * Set origin of sealevel data to 0 and regress with no intercept
                p.example_data -d sealevel\\
                | p.df 'df["year"] = df.year - df.year.iloc[0]'\\
                'df["sealevel_mm"] = df.sealevel_mm - df.sealevel_mm.iloc[0]'\\
                | p.regress -m 'sealevel_mm ~ year - 1' --fit\\
                | p.plot -x year -y sealevel_mm fit_ --style '.' '-'\\
                     --alpha .2 1 --legend best --title 'Force Zero Intercept'

        -----------------------------------------------------------------------
        """)

    #  read command line arguments
    parser = argparse.ArgumentParser(
        formatter_class=argparse.RawDescriptionHelpFormatter, description=msg)

    arg_lib.add_args(parser, 'io_in', 'io_out', 'example')

    # specify columns to histogram
    parser.add_argument("-m",
                        "--model",
                        type=str,
                        nargs=1,
                        required=True,
                        help="The model expressed in patsy syntax")

    msg = "Return input with fit and residual appended"
    parser.add_argument("--fit",
                        action="store_true",
                        dest='retfit',
                        default=False,
                        help=msg)

    parser.add_argument("--plot",
                        action="store_true",
                        default=False,
                        help="Make residual plots")

    # parse arguments
    args = parser.parse_args()

    # get the input dataframe
    df = io_lib.df_from_input(args)

    # fit the model and add fit, resid columns
    result = sm.ols(formula=args.model[0], data=df).fit()
    df['fit_'] = result.fittedvalues
    df['resid_'] = result.resid

    # add and output the fit results if requested
    if args.retfit:
        io_lib.df_to_output(args, df)
        return

    # print the fit summary
    sys.stdout.write('\n{}\n'.format(result.summary()))
    sys.stdout.flush()

    # do plots if requested
    if args.plot:
        pl.subplot(211)
        pl.plot(df.fit_, df.resid_, '.', alpha=.5)
        pl.xlabel('Fit')
        pl.ylabel('Residual')
        pl.title(args.model[0])

        pl.subplot(212)
        sns.distplot(df.resid_, bins=50)
        pl.xlabel('Residual with R^2 = {:0.4f}'.format(result.rsquared))
        pl.ylabel('Counts')

        # annoying issue with osx backend forces if statement here
        if mpl.get_backend().lower() in ['agg', 'macosx']:
            pl.gcf().set_tight_layout(True)
        else:
            pl.gcf().tight_layout()

        plot_lib.show(args)
Example #24
0
def main():  # pragma: no cover
    # read command line arguments
    msg = textwrap.dedent(
        """
        Enables pandas dataframe processing at the unix command line.

        This is the real workhorse of the pandashells toolkit.  It reads data
        from stdin as a dataframe, which is passed through any number of pandas
        operations provided on the command line.  Output is always to stdout.

        Each operation assumes data is in a dataframe named df.  Operations
        performed on this dataframe will overwrite the df variable with
        the results of that operation.  Special consideration is taken for
        assignments such as df['a'] = df.b + df.c.  These are understood
        to agument the input dataframe with a new column. By way of example,
        this command:
            p.df 'df.groupby(by="a").b.count()' 'df.reset_index()'
        is equivalent to the python expressions:
            df = df.groupby(by="a").b.count()
            df = df.reset_index()

        In addition to providing access to pandas dataframes, a number of
        modules are loaded into the namespace so as to be accessible from the
        command line.  These modules are:
            pd = pandas
            np = numpy
            scp = scipy
            pl = pylab
            parse = dateutil.parser.parse
            datetime = datetime
            re = re

        ** Important **
        When creating chains of dataframe operations (see examples), it is
        important to express your chain of operations before any options. This
        is because some options can take multiple arguments and the parser
        won't be able to properly decode your meaning.
        For example:
            cat file.csv | p.df 'df["x"] = df.y + 1' -o table noheader  # GOOD
            cat file.csv | p.df -o table noheader 'df["x"] = df.y + 1'  # BAD

        Input can be read in different formats as specified by the -i switch.
        The most common formats are csv and table (white-space-delimited).  In
        either of these formats, p.df can accomodate input data that either
        does or doesn not have a header row.  When no header row is indicated,
        The columns of the Dataframe will be labeled as c0, c1, ..., cN.

        Plotting methods invoked on a Dataframe generate no output, but
        create an interactive plot instead.  There are a number of plot
        specific options available at the command line that govern the details
        of how these plots are rendered (e.g. --xlim, --legend, etc).

        -----------------------------------------------------------------------
        Examples:

            * Print a csv file in nice tabular format
                p.example_data -d tips | p.df -o table | head

            * Select by row
                p.example_data -d tips \\
                | p.df 'df[df.sex=="Female"]' 'df[df.smoker=="Yes"]' -o table

            * Extract columns
                p.example_data -d tips \\
                | p.df 'df[["total_bill", "tip"]].head()' -o table

            * Perform grouped aggregations
                p.example_data -d tips | p.df \\
                'df.groupby(by=["sex", "smoker"]).tip.sum()' -o table index

            * Use pandas plotting methods
                p.example_data -d tips | p.df \\
                'df.groupby(by="day").total_bill.sum().plot(kind="barh")'\\
                --xlabel 'Dollars' --title 'Total Bills by Day'

            * Convert between tabular and csv format with/without header rows
                seq 10 | awk '{print $1, 2*$1}'\\
                | p.df --names a b -i table noheader | p.df -o table noheader

        -----------------------------------------------------------------------
        """
    )

    parser = argparse.ArgumentParser(
        formatter_class=argparse.RawDescriptionHelpFormatter, description=msg)
    arg_lib.add_args(parser, 'io_in', 'io_out', 'decorating', 'example')
    msg = (
        '(MUST come before any options) '
        '[statement ...] Statement(s) to execute. '
    )
    parser.add_argument(
        "statement", help=msg, nargs="*")
    args = parser.parse_args()

    # get the input dataframe
    df = io_lib.df_from_input(args)

    # execute the statements in order
    # plot commands are terminal statements so will call sys.exit()
    for cmd in args.statement:
        df = process_command(args, cmd, df)

    # write the output
    io_lib.df_to_output(args, df)
def main():
    msg = textwrap.dedent("""
        Computes a spectrogram using the lomb-scargle algorithm provided by
        the gatspy module.  The input time series need not have evenly spaced
        time-stamps.  The FFT-based algorithm has complexity O[N*log(N)].

        -----------------------------------------------------------------------
        Examples:

            * Plot the spectrum of a simple sine wave
                  p.linspace 0 10 100 \\
                  | p.df 'df["value"] = 7 * np.sin(2*np.pi*df.time / 1.5)'\\
                        --names time\\
                  | p.lomb_scargle -t time -y value --interp_exp 3\\
                  | p.plot -x period -y amp --xlim 0 3

            * Show the annual and 59-day peaks in the sealevel spectrum
                p.example_data -d sealevel\\
                | p.df 'df["day"] = 365.25 * df.year'\\
                        'df["day"] = df.day - df.day.iloc[0]'\\
                | p.lomb_scargle -t day -y sealevel_mm --interp_exp 3\\
                | p.df 'df[df.period < 720]'\\
                | p.plot -x period -y amp --xlim 1 400\\
                         --title 'Sea-surface height spectrum'\\
                         --xlabel 'period (days)'

        -----------------------------------------------------------------------
        """)

    # read command line arguments
    parser = argparse.ArgumentParser(
        formatter_class=argparse.RawDescriptionHelpFormatter, description=msg)

    arg_lib.add_args(parser, 'io_in', 'io_out')

    parser.add_argument('-t',
                        '--time_col',
                        help='Time Column',
                        nargs=1,
                        required=True,
                        type=str)

    parser.add_argument('-y',
                        '--observation_col',
                        help='Observation column',
                        nargs=1,
                        dest='val_col',
                        required=True,
                        type=str)

    parser.add_argument('--interp_exp',
                        help='Interpolate by this power of 2',
                        nargs=1,
                        type=int,
                        default=[1])
    parser.add_argument('--freq_order',
                        action='store_true',
                        dest='freq_order',
                        default=False,
                        help='Order output by freqency instead of period')

    # parse arguments
    args = parser.parse_args()

    # get the input dataframe
    df = io_lib.df_from_input(args)
    df = lomb_scargle_lib.lomb_scargle(df, args.time_col[0], args.val_col[0],
                                       args.interp_exp[0], args.freq_order)

    # write dataframe to output
    io_lib.df_to_output(args, df)
Example #26
0
def main():
    msg = textwrap.dedent("""
        Return random samples from common probability distrubtions.

        -----------------------------------------------------------------------
        Examples:

            uniform:  p.rand -n 1000 -t uniform  --min=0    --max=1   | p.hist
            normal:   p.rand -n 1000 -t normal   --mu=0     --sigma=1 | p.hist
            poisson:  p.rand -n 1000 -t poisson  --mu=1               | p.hist
            beta:     p.rand -n 1000 -t beta     --alpha=2  --beta=6  | p.hist
            gamma:    p.rand -n 1000 -t gamma    --alpha=1  --beta=1  | p.hist
            binomial: p.rand -n 1000 -t binomial --N=10     --p=0.4   | p.hist
        -----------------------------------------------------------------------
        """)

    # read command line arguments
    parser = argparse.ArgumentParser(
        formatter_class=argparse.RawDescriptionHelpFormatter, description=msg)

    parser.add_argument(
        '-t',
        '--type',
        nargs=1,
        type=str,
        default=['uniform'],
        choices=['uniform', 'normal', 'beta', 'gamma', 'binomial', 'poisson'],
        help='type of distribution (default=\'uniform\')')
    parser.add_argument('-n',
                        '--num_samples',
                        nargs=1,
                        default=[10],
                        type=int,
                        help='The number of rows to generate (default=10)')
    parser.add_argument(
        '-c',
        '--columns',
        nargs=1,
        default=[1],
        type=int,
        help='The number of columns to generate per row (default=1)')
    parser.add_argument(
        '--N',
        nargs=1,
        default=[10],
        type=int,
        help=('(Binomial Dist) Largest possible value for random variable. '
              '(default=10)'))
    parser.add_argument(
        '--p',
        nargs=1,
        default=[.5],
        type=float,
        help=('(Binomial Dist) Bernoulli probability for each trial'
              '(default=.5)'))
    parser.add_argument(
        '--mu',
        nargs=1,
        type=float,
        help='(Normal, Poisson) Mean (defaults: normal:0, poisson:1')
    parser.add_argument('--sigma',
                        nargs=1,
                        default=[1.],
                        type=float,
                        help='(Normal) standard deviation, (default: 1)')
    parser.add_argument('--min',
                        nargs=1,
                        default=[0.],
                        type=float,
                        help='(Uniform) Minimum value of range, (default: 0)')
    parser.add_argument('--max',
                        nargs=1,
                        default=[1.],
                        type=float,
                        help='(Uniform) Maximum value of range, (default: 1)')
    parser.add_argument('--alpha',
                        nargs=1,
                        default=[2.],
                        type=float,
                        help='(Beta, Gamma)  (default: 2)')
    parser.add_argument('--beta',
                        nargs=1,
                        default=[2.],
                        type=float,
                        help='(Beta, Gamma)  (default: 2)')

    arg_lib.add_args(parser, 'io_out')

    # parse arguments
    args = parser.parse_args()

    # set some defaults
    args = fill_default_mu(args)

    # get the samples
    df = get_samples(args)

    # write dataframe to output
    io_lib.df_to_output(args, df)
Example #27
0
def main():
    msg = textwrap.dedent(
        """
        Creates faceted plots using seaborn FacetGrid.

        With this tool, you can create a group of plots which show aspects
        of the same dataset broken down in different ways.  See the seaborn
        FacetGrid documentation for more detail.

        The --map argument to this function specifies a function to use
        for generating each of the plots.  The following modules are available
        in the namespace:
            pl = pylab
            sns = seaborn
        -----------------------------------------------------------------------
        Examples:

            * Scatterplot of tips vs bill for different combinations of sex,
              smoker, and day of the week:
                    p.example_data -d tips | \\
                    p.facet_grid --row smoker --col sex --hue day \\
                    --map pl.scatter \\
                    --args total_bill tip --kwargs 'alpha=.2' 's=100'

            * Histogram of tips broken down by sex, smoker and day
                    p.example_data -d tips | p.facet_grid --col day \\
                    --row sex --hue smoker  --sharex --sharey --aspect 1 \\
                    --map pl.hist --args tip \\
                    --kwargs 'alpha=.2' 'range=[0, 10]' 'bins=20'
        -----------------------------------------------------------------------
        """
    )

    #  read command line arguments
    parser = argparse.ArgumentParser(formatter_class=argparse.RawDescriptionHelpFormatter, description=msg)

    arg_lib.add_args(parser, "io_in")

    msg = "Different values of this variable in separate rows"
    parser.add_argument("--row", nargs=1, type=str, dest="row", metavar="row", help=msg)

    msg = "Different values of this variable in separate columns"
    parser.add_argument("--col", nargs=1, type=str, dest="col", metavar="col", help=msg)

    msg = "Different values of this variable in separate colors"
    parser.add_argument("--hue", nargs=1, type=str, dest="hue", metavar="hue", help=msg)

    msg = "The aspect ratio of each plot"
    parser.add_argument("--aspect", nargs=1, type=float, dest="aspect", metavar="aspect", default=[2], help=msg)

    msg = "The size of each plot (default=4)"
    parser.add_argument("--size", nargs=1, type=float, dest="size", metavar="size", help=msg, default=[4])

    msg = "The plotting function to use for each facet"
    parser.add_argument("--map", nargs=1, type=str, dest="map", metavar="map", required=True, help=msg)

    msg = "The args to pass to the plotting function"
    parser.add_argument("--args", nargs="+", type=str, dest="args", metavar="args", required=True, help=msg)

    msg = "Plotting function kwargs expressed as 'a=1' 'b=2' ... "
    parser.add_argument("--kwargs", nargs="+", type=str, dest="kwargs", metavar="kwargs", help=msg)

    msg = "Share x axis"
    parser.add_argument("--sharex", action="store_true", dest="sharex", default=False, help=msg)

    msg = "Share y axis"
    parser.add_argument("--sharey", action="store_true", dest="sharey", default=False, help=msg)

    msg = "x axis limits when sharex=True"
    parser.add_argument("--xlim", nargs=2, type=float, dest="xlim", metavar="xlim", help=msg)

    msg = "y axis limits when sharex=True"
    parser.add_argument("--ylim", nargs=2, type=float, dest="ylim", metavar="ylim", help=msg)

    msg = "Save the figure to this file"
    parser.add_argument("--savefig", nargs=1, type=str, help=msg)

    # parse arguments
    args = parser.parse_args()

    # get the input dataframe
    df = io_lib.df_from_input(args)

    facet_grid_kwargs = {
        "row": args.row[0] if args.row else None,
        "col": args.col[0] if args.col else None,
        "hue": args.hue[0] if args.hue else None,
        "aspect": args.aspect[0],
        "size": args.size[0],
        "sharex": args.sharex,
        "sharey": args.sharey,
        "xlim": args.xlim if args.xlim else None,
        "ylim": args.ylim if args.ylim else None,
    }
    grid = sns.FacetGrid(df, **facet_grid_kwargs)

    map_func_name = args.map[0]

    scope = {"pl": pl, "sns": sns, "map_func_name": map_func_name}
    exec("map_func = {}".format(map_func_name), scope)
    map_func = scope["map_func"]

    map_args = args.args

    map_kwargs = {}
    if args.kwargs:
        for kwarg in args.kwargs:
            exec("map_kwargs.update(dict({}))".format(kwarg))

    grid.map(map_func, *map_args, **map_kwargs)  # noqa  defined in exec above
    grid.add_legend()
    plot_lib.show(args)
Example #28
0
def main():
    msg = textwrap.dedent(
        """
        Create a single variable regression plot of specified order.

        -----------------------------------------------------------------------
        Examples:
            * Fit a line to synthetic data with boostrap errors.
                p.linspace 0 10 20 \\
                | p.df 'df["y_true"] = .2 * df.x' \\
                       'df["noise"] = np.random.randn(20)' \\
                        'df["y"] = df.y_true + df.noise' --names x \\
                | p.regplot -x x -y y

            * Fit a quadratic to synthetic data with boostrap errors.
                p.linspace 0 10 40 \\
                | p.df 'df["y_true"] = .5 * df.x  + .3 * df.x ** 2'\\
                       'df["noise"] = np.random.randn(40)' \\
                        'df["y"] = df.y_true + df.noise' --names x \\
                | p.regplot -x x -y y --order 2

            * Fit sealevel data with no bootstrap
                p.example_data -d sealevel\\
                | p.regplot -x year -y sealevel_mm --n_boot 1


        -----------------------------------------------------------------------
        """
    )

    #  read command line arguments
    parser = argparse.ArgumentParser(
        formatter_class=argparse.RawDescriptionHelpFormatter, description=msg)

    arg_lib.add_args(parser, 'io_in', 'io_out', 'decorating')

    msg = 'Column for dependent variable'
    parser.add_argument('-x', nargs=1, type=str, dest='x', metavar='col',
                        help=msg, required=True)

    msg = 'Column for independent variable'
    parser.add_argument('-y', nargs=1, type=str, dest='y',
                        metavar='col', help=msg, required=True)

    msg = 'The order of the polynomial to fit (default = 1)'
    parser.add_argument('--order', help=msg, nargs=1, default=[1], type=int)

    msg = 'Number of bootstrap samples for uncertainty region (default=1000)'
    parser.add_argument(
        '--n_boot', help=msg, nargs=1, default=[1000], type=int)

    parser.add_argument('-a', '--alpha', help='Set opacity',
                        nargs=1, default=[0.5], type=float)

    # parse arguments
    args = parser.parse_args()

    # get the input dataframe
    df = io_lib.df_from_input(args)

    # extract command line params
    x = df[args.x[0]].values
    y = df[args.y[0]].values

    # do a polyfit with the specified order
    coeffs = np.polyfit(x, y, args.order[0])

    label = make_label(coeffs, args.savefig)

    sns.regplot(
        x, y, order=args.order[0], n_boot=args.n_boot[0],
        line_kws={'label': label, 'color': CC[2], 'alpha': .5},
        scatter_kws={'alpha': args.alpha[0], 'color': CC[0]})

    pl.legend(loc='best')
    pl.xlabel(args.x[0])
    pl.ylabel(args.y[0])
    plot_lib.refine_plot(args)
    plot_lib.show(args)
Example #29
0
def get_input_args():
    msg = textwrap.dedent("""
        Plot histograms from input data.  Can either plot just a single
        histogram or a grid of histograms with different columns of data.
        When multiple columns are specified, creates a grid of histograms,
        one for each specified column.

        -----------------------------------------------------------------------
        Examples:

            * Plot histogram of a beta distriubtion
                p.rand -t beta --alpha 3 --beta 10 -n 10000\\
                | p.hist --names beta -n 50

            * Plot a sid-by-side comparison of a gamma and normal distriubtion
              paste <(p.rand -t normal  -n 10000 | p.df --names normal)\\
                    <(p.rand -t gamma   -n 10000 | p.df --names gamma)\\
              | p.hist -i table -c normal gamma
        -----------------------------------------------------------------------
        """)

    #  read command line arguments
    parser = argparse.ArgumentParser(
        formatter_class=argparse.RawDescriptionHelpFormatter, description=msg)

    arg_lib.add_args(parser, 'io_in', 'io_out', 'decorating')

    # specify columns to histogram
    parser.add_argument('-c',
                        '--cols',
                        help='Column(s) to histogram',
                        nargs='+')
    parser.add_argument(
        '-q',
        '--quiet',
        action='store_true',
        default=False,
        help='Quiet mean no plots. Send numeric output to stdout instead')
    parser.add_argument('-n',
                        '--nbins',
                        help='Number of bins (default=30)',
                        nargs=1,
                        default=[30],
                        type=int)
    parser.add_argument('-r',
                        '--range',
                        help='Range (min max) of x axis',
                        nargs=2,
                        default=None,
                        type=float)
    parser.add_argument('-l',
                        '--layout',
                        help='Layout (rows, cols)',
                        nargs=2,
                        default=None,
                        type=int)
    parser.add_argument('-a',
                        '--alpha',
                        help='Set opacity of hist bars',
                        nargs=1,
                        default=[1.],
                        type=float)
    parser.add_argument('-d',
                        '--density',
                        action='store_true',
                        default=False,
                        help='Show probability density instead of counts')
    parser.add_argument('--sharex',
                        action='store_true',
                        default=False,
                        help='Make all x axes have the same range')
    parser.add_argument('--sharey',
                        action='store_true',
                        default=False,
                        help='Make all y axes have the same range')
    return parser.parse_args()
Example #30
0
def main():
    msg = textwrap.dedent("""
        Creates faceted plots using seaborn FacetGrid.

        With this tool, you can create a group of plots which show aspects
        of the same dataset broken down in different ways.  See the seaborn
        FacetGrid documentation for more detail.

        The --map argument to this function specifies a function to use
        for generating each of the plots.  The following modules are available
        in the namespace:
            pl = pylab
            sns = seaborn
        -----------------------------------------------------------------------
        Examples:

            * Scatterplot of tips vs bill for different combinations of sex,
              smoker, and day of the week:
                    p.example_data -d tips | \\
                    p.facet_grid --row smoker --col sex --hue day \\
                    --map pl.scatter \\
                    --args total_bill tip --kwargs 'alpha=.2' 's=100'

            * Histogram of tips broken down by sex, smoker and day
                    p.example_data -d tips | p.facet_grid --col day \\
                    --row sex --hue smoker  --sharex --sharey --aspect 1 \\
                    --map pl.hist --args tip \\
                    --kwargs 'alpha=.2' 'range=[0, 10]' 'bins=20'
        -----------------------------------------------------------------------
        """)

    #  read command line arguments
    parser = argparse.ArgumentParser(
        formatter_class=argparse.RawDescriptionHelpFormatter, description=msg)

    arg_lib.add_args(parser, 'io_in')

    msg = 'Different values of this variable in separate rows'
    parser.add_argument('--row',
                        nargs=1,
                        type=str,
                        dest='row',
                        metavar='row',
                        help=msg)

    msg = 'Different values of this variable in separate columns'
    parser.add_argument('--col',
                        nargs=1,
                        type=str,
                        dest='col',
                        metavar='col',
                        help=msg)

    msg = 'Different values of this variable in separate colors'
    parser.add_argument('--hue',
                        nargs=1,
                        type=str,
                        dest='hue',
                        metavar='hue',
                        help=msg)

    msg = 'The aspect ratio of each plot'
    parser.add_argument('--aspect',
                        nargs=1,
                        type=float,
                        dest='aspect',
                        metavar='aspect',
                        default=[2],
                        help=msg)

    msg = 'The size of each plot (default=4)'
    parser.add_argument('--size',
                        nargs=1,
                        type=float,
                        dest='size',
                        metavar='size',
                        help=msg,
                        default=[4])

    msg = 'The plotting function to use for each facet'
    parser.add_argument('--map',
                        nargs=1,
                        type=str,
                        dest='map',
                        metavar='map',
                        required=True,
                        help=msg)

    msg = 'The args to pass to the plotting function'
    parser.add_argument('--args',
                        nargs='+',
                        type=str,
                        dest='args',
                        metavar='args',
                        required=True,
                        help=msg)

    msg = 'Plotting function kwargs expressed as \'a=1\' \'b=2\' ... '
    parser.add_argument('--kwargs',
                        nargs='+',
                        type=str,
                        dest='kwargs',
                        metavar='kwargs',
                        help=msg)

    msg = 'Share x axis'
    parser.add_argument('--sharex',
                        action='store_true',
                        dest='sharex',
                        default=False,
                        help=msg)

    msg = 'Share y axis'
    parser.add_argument('--sharey',
                        action='store_true',
                        dest='sharey',
                        default=False,
                        help=msg)

    msg = 'x axis limits when sharex=True'
    parser.add_argument('--xlim',
                        nargs=2,
                        type=float,
                        dest='xlim',
                        metavar='xlim',
                        help=msg)

    msg = 'y axis limits when sharex=True'
    parser.add_argument('--ylim',
                        nargs=2,
                        type=float,
                        dest='ylim',
                        metavar='ylim',
                        help=msg)

    msg = "Save the figure to this file"
    parser.add_argument('--savefig', nargs=1, type=str, help=msg)

    warnings.filterwarnings('ignore')
    # parse arguments
    args = parser.parse_args()

    # get the input dataframe
    df = io_lib.df_from_input(args)

    facet_grid_kwargs = {
        'row': args.row[0] if args.row else None,
        'col': args.col[0] if args.col else None,
        'hue': args.hue[0] if args.hue else None,
        'aspect': args.aspect[0],
        'size': args.size[0],
        'sharex': args.sharex,
        'sharey': args.sharey,
        'xlim': args.xlim if args.xlim else None,
        'ylim': args.ylim if args.ylim else None,
    }
    grid = sns.FacetGrid(df, **facet_grid_kwargs)

    map_func_name = args.map[0]

    scope = {'pl': pl, 'sns': sns, 'map_func_name': map_func_name}
    exec('map_func = {}'.format(map_func_name), scope)
    map_func = scope['map_func']

    map_args = args.args

    map_kwargs = {}
    if args.kwargs:
        for kwarg in args.kwargs:
            exec('map_kwargs.update(dict({}))'.format(kwarg))

    grid.map(map_func, *map_args, **map_kwargs)  # noqa  defined in exec above
    grid.add_legend()
    plot_lib.show(args)
Example #31
0
def main():
    msg = "Tool to run shell commands in parallel.  Spawns processes "
    msg += "to concurrently run commands supplied on stdin. "

    msg = textwrap.dedent(
        """
        Read a list of commands from stdin and execute them in parrallel.

        -----------------------------------------------------------------------
        Examples:

            * This line generates commands that will be used in the examples.
                time seq 10 \\
                | p.format -t 'sleep 1; echo done {n}' --names n -i noheader

            * Execute the commands one at a time, no parallelism
                time seq 10 \\
                | p.format -t 'sleep 1; echo done {n}' --names n -i noheader \\
                | p.parallel -n 1

            * Execute all commands in parallel
                time seq 10 \\
                | p.format -t 'sleep 1; echo done {n}' --names n -i noheader \\
                | p.parallel -n 10

            * Suppress stdout from processes and echo commands
                time seq 10 \\
                | p.format -t 'sleep 1; echo done {n}' --names n -i noheader \\
                | p.parallel -n 10 -c -s stdout

            * Make a histogram of how long the individual jobs took
                time seq 100 \\
                | p.format -t 'sleep 1; echo done {n}' --names n -i noheader \\
                | p.parallel -n 50 -v \\
                | grep __job__ \\
                | p.df 'df.dropna()' 'df.duration_sec.hist(bins=20)'
        -----------------------------------------------------------------------
        """
    )

    # read command line arguments
    parser = argparse.ArgumentParser(
        formatter_class=argparse.RawDescriptionHelpFormatter, description=msg)

    msg = "The number of jobs to run in parallel. If not supplied, will "
    msg += "default to the number of detected cores."
    parser.add_argument('--njobs', '-n', dest='njobs', default=[None],
                        nargs=1, type=int, help=msg)
    parser.add_argument("-v", "--verbose", action="store_true", default=False,
                        help="Enable verbose output")

    parser.add_argument("-c", "--show_commands", action="store_true",
                        default=False, help="Print commands to stdout")

    msg = "Suppress stdout, stderr, or both for all running jobs"
    parser.add_argument("-s", "--suppress",
                        choices=['stdout', 'stderr', 'both'], default=[None],
                        nargs=1, help=msg)

    # add standard arg groups
    arg_lib.add_args(parser, 'example')

    # parse arguments
    args = parser.parse_args()

    # get the commands from stdin
    cmd_list = sys.stdin.readlines()

    # get suppression vars from args
    suppress_stdout = 'stdout' in args.suppress or 'both' in args.suppress
    suppress_stderr = 'stderr' in args.suppress or 'both' in args.suppress

    # run the commands
    parallel_lib.parallel(
        cmd_list,
        njobs=args.njobs[0],
        verbose=args.verbose,
        suppress_cmd=(not args.show_commands),
        suppress_stdout=suppress_stdout,
        suppress_stderr=suppress_stderr,
        assume_hyperthread=True)
Example #32
0
def main():
    msg = textwrap.dedent(
        """
        Creates faceted plots using seaborn FacetGrid.

        With this tool, you can create a group of plots which show aspects
        of the same dataset broken down in different ways.  See the seaborn
        FacetGrid documentation for more detail.

        The --map argument to this function specifies a function to use
        for generating each of the plots.  The following modules are available
        in the namespace:
            pl = pylab
            sns = seaborn
        -----------------------------------------------------------------------
        Examples:

            * Scatterplot of tips vs bill for different combinations of sex,
              smoker, and day of the week:
                    p.example_data -d tips | \\
                    p.facet_grid --row smoker --col sex --hue day \\
                    --map pl.scatter \\
                    --args total_bill tip --kwargs 'alpha=.2' 's=100'

            * Histogram of tips broken down by sex, smoker and day
                    p.example_data -d tips | p.facet_grid --col day \\
                    --row sex --hue smoker  --sharex --sharey --aspect 1 \\
                    --map pl.hist --args tip \\
                    --kwargs 'alpha=.2' 'range=[0, 10]' 'bins=20'
        -----------------------------------------------------------------------
        """
    )

    #  read command line arguments
    parser = argparse.ArgumentParser(
        formatter_class=argparse.RawDescriptionHelpFormatter, description=msg)

    arg_lib.add_args(parser, 'io_in')

    msg = 'Different values of this variable in separate rows'
    parser.add_argument(
        '--row', nargs=1, type=str, dest='row', metavar='row', help=msg)

    msg = 'Different values of this variable in separate columns'
    parser.add_argument(
        '--col', nargs=1, type=str, dest='col', metavar='col', help=msg)

    msg = 'Different values of this variable in separate colors'
    parser.add_argument(
        '--hue', nargs=1, type=str, dest='hue', metavar='hue', help=msg)

    msg = 'The aspect ratio of each plot'
    parser.add_argument(
        '--aspect', nargs=1, type=float, dest='aspect', metavar='aspect',
        default=[2], help=msg)

    msg = 'The size of each plot (default=4)'
    parser.add_argument(
        '--size', nargs=1, type=float, dest='size', metavar='size',
        help=msg, default=[4])

    msg = 'The plotting function to use for each facet'
    parser.add_argument(
        '--map', nargs=1, type=str, dest='map', metavar='map', required=True,
        help=msg)

    msg = 'The args to pass to the plotting function'
    parser.add_argument(
        '--args', nargs='+', type=str, dest='args', metavar='args',
        required=True, help=msg)

    msg = 'Plotting function kwargs expressed as \'a=1\' \'b=2\' ... '
    parser.add_argument(
        '--kwargs', nargs='+', type=str, dest='kwargs',
        metavar='kwargs', help=msg)

    msg = 'Share x axis'
    parser.add_argument('--sharex', action='store_true', dest='sharex',
                        default=False, help=msg)

    msg = 'Share y axis'
    parser.add_argument('--sharey', action='store_true', dest='sharey',
                        default=False, help=msg)

    msg = 'x axis limits when sharex=True'
    parser.add_argument(
        '--xlim', nargs=2, type=float, dest='xlim', metavar='xlim', help=msg)

    msg = 'y axis limits when sharex=True'
    parser.add_argument(
        '--ylim', nargs=2, type=float, dest='ylim', metavar='ylim', help=msg)

    msg = "Save the figure to this file"
    parser.add_argument('--savefig', nargs=1, type=str, help=msg)

    warnings.filterwarnings('ignore')
    # parse arguments
    args = parser.parse_args()

    # get the input dataframe
    df = io_lib.df_from_input(args)

    facet_grid_kwargs = {
        'row': args.row[0] if args.row else None,
        'col': args.col[0] if args.col else None,
        'hue': args.hue[0] if args.hue else None,
        'aspect': args.aspect[0],
        'size': args.size[0],
        'sharex': args.sharex,
        'sharey': args.sharey,
        'xlim': args.xlim if args.xlim else None,
        'ylim': args.ylim if args.ylim else None,
    }
    grid = sns.FacetGrid(df, **facet_grid_kwargs)

    map_func_name = args.map[0]

    scope = {'pl': pl, 'sns': sns, 'map_func_name': map_func_name}
    exec('map_func = {}'.format(map_func_name), scope)
    map_func = scope['map_func']

    map_args = args.args

    map_kwargs = {}
    if args.kwargs:
        for kwarg in args.kwargs:
            exec('map_kwargs.update(dict({}))'.format(kwarg))

    grid.map(map_func, *map_args, **map_kwargs)  # noqa  defined in exec above
    grid.add_legend()
    plot_lib.show(args)
Example #33
0
def main():
    msg = "Tool to run shell commands in parallel.  Spawns processes "
    msg += "to concurrently run commands supplied on stdin. "

    msg = textwrap.dedent("""
        Read a list of commands from stdin and execute them in parrallel.

        -----------------------------------------------------------------------
        Examples:

            * This line generates commands that will be used in the examples.
                time seq 10 \\
                | p.format -t 'sleep 1; echo done {n}' --names n -i noheader

            * Execute the commands one at a time, no parallelism
                time seq 10 \\
                | p.format -t 'sleep 1; echo done {n}' --names n -i noheader \\
                | p.parallel -n 1

            * Execute all commands in parallel
                time seq 10 \\
                | p.format -t 'sleep 1; echo done {n}' --names n -i noheader \\
                | p.parallel -n 10

            * Suppress stdout from processes and echo commands
                time seq 10 \\
                | p.format -t 'sleep 1; echo done {n}' --names n -i noheader \\
                | p.parallel -n 10 -c -s stdout

            * Make a histogram of how long the individual jobs took
                time seq 100 \\
                | p.format -t 'sleep 1; echo done {n}' --names n -i noheader \\
                | p.parallel -n 50 -v \\
                | grep __job__ \\
                | p.df 'df.dropna()' 'df.duration_sec.hist(bins=20)'
        -----------------------------------------------------------------------
        """)

    # read command line arguments
    parser = argparse.ArgumentParser(
        formatter_class=argparse.RawDescriptionHelpFormatter, description=msg)

    msg = "The number of jobs to run in parallel. If not supplied, will "
    msg += "default to the number of detected cores."
    parser.add_argument('--njobs',
                        '-n',
                        dest='njobs',
                        default=[None],
                        nargs=1,
                        type=int,
                        help=msg)
    parser.add_argument("-v",
                        "--verbose",
                        action="store_true",
                        default=False,
                        help="Enable verbose output")

    parser.add_argument("-c",
                        "--show_commands",
                        action="store_true",
                        default=False,
                        help="Print commands to stdout")

    msg = "Suppress stdout, stderr, or both for all running jobs"
    parser.add_argument("-s",
                        "--suppress",
                        choices=['stdout', 'stderr', 'both'],
                        default=[None],
                        nargs=1,
                        help=msg)

    # add standard arg groups
    arg_lib.add_args(parser, 'example')

    # parse arguments
    args = parser.parse_args()

    # get the commands from stdin
    cmd_list = sys.stdin.readlines()

    # get suppression vars from args
    suppress_stdout = 'stdout' in args.suppress or 'both' in args.suppress
    suppress_stderr = 'stderr' in args.suppress or 'both' in args.suppress

    # run the commands
    parallel_lib.parallel(cmd_list,
                          njobs=args.njobs[0],
                          verbose=args.verbose,
                          suppress_cmd=(not args.show_commands),
                          suppress_stdout=suppress_stdout,
                          suppress_stderr=suppress_stderr,
                          assume_hyperthread=True)