def test_three_pass_with_ref(self):
     sigma_thresh = 2
     ser = pd.Series([-4, -4] + [0, 1, 2] * 4 + [5])
     ref = pd.Series(range(len(ser)))
     ser = ser - ser.mean() + ref
     df = pd.DataFrame({'ser': ser, 'ref': ref})
     df = sigma_edit_dataframe(sigma_thresh, ['ser'], df)
     self.assertTrue(all_in_bounds(sigma_thresh, df['ser']))
 def test_three_pass_with_ref(self):
     sigma_thresh = 2
     ser = pd.Series([-4, -4] + [0, 1, 2] * 4 + [5])
     ref = pd.Series(range(len(ser)))
     ser = ser - ser.mean() + ref
     df = pd.DataFrame({'ser': ser, 'ref': ref})
     df = sigma_edit_dataframe(sigma_thresh, ['ser'], df)
     self.assertTrue(all_in_bounds(sigma_thresh, df['ser']))
Example #3
0
def main():
    msg = textwrap.dedent("""
        Remove outliers from DataFrame columns using a recursive sigma-edit
        algorithm.  The algorithm will recursively NaN out values greater than
        sigma_thresh standard deviations away from sample mean.

        -----------------------------------------------------------------------
        Examples:

            * Do a 2.5-sigma edit on a gamma distribution and show histogram
                p.rand -n 1000 -t gamma --alpha=3 --beta=.01\\
                | p.df 'df["c1"] = df.c0'\\
                | p.sig_edit -c c1 -t 2.5\\
                | p.df 'pd.melt(df)' --names raw edited\\
                | p.facet_grid --hue variable --map pl.hist\\
                   --args value --kwargs 'alpha=.2' 'range=[0, 1000]' 'bins=50'
        -----------------------------------------------------------------------
        """)

    #  read command line arguments
    parser = argparse.ArgumentParser(
        formatter_class=argparse.RawDescriptionHelpFormatter, description=msg)

    arg_lib.add_args(parser, 'io_in', 'io_out', 'example')

    parser.add_argument("-t",
                        "--sigma_thresh",
                        help="Sigma threshold",
                        nargs=1,
                        required=True,
                        type=float)
    parser.add_argument("-c",
                        "--cols",
                        required=True,
                        help="Column(s) to sigma-edit",
                        nargs="+")
    parser.add_argument("--max_iter",
                        help="Max number of recursions",
                        nargs=1,
                        type=int,
                        default=[20])

    # parse arguments
    args = parser.parse_args()

    # get the input dataframe
    df = io_lib.df_from_input(args)
    df = outlier_lib.sigma_edit_dataframe(args.sigma_thresh[0],
                                          args.cols,
                                          df,
                                          max_iter=args.max_iter[0])

    # write dataframe to output
    io_lib.df_to_output(args, df)
Example #4
0
def main():
    msg = textwrap.dedent(
        """
        Remove outliers from DataFrame columns using a recursive sigma-edit
        algorithm.  The algorithm will recursively NaN out values greater than
        sigma_thresh standard deviations away from sample mean.

        -----------------------------------------------------------------------
        Examples:

            * Do a 2.5-sigma edit on a gamma distribution and show histogram
                p.rand -n 1000 -t gamma --alpha=3 --beta=.01\\
                | p.df 'df["c1"] = df.c0'\\
                | p.sig_edit -c c1 -t 2.5\\
                | p.df 'pd.melt(df)' --names raw edited\\
                | p.facet_grid --hue variable --map pl.hist\\
                   --args value --kwargs 'alpha=.2' 'range=[0, 1000]' 'bins=50'
        -----------------------------------------------------------------------
        """
    )

    #  read command line arguments
    parser = argparse.ArgumentParser(
        formatter_class=argparse.RawDescriptionHelpFormatter, description=msg)

    arg_lib.add_args(parser, 'io_in', 'io_out', 'example')

    parser.add_argument("-t", "--sigma_thresh", help="Sigma threshold",
                        nargs=1, required=True, type=float)
    parser.add_argument("-c", "--cols", required=True,
                        help="Column(s) to sigma-edit", nargs="+")
    parser.add_argument("--max_iter", help="Max number of recursions",
                        nargs=1, type=int, default=[20])

    # parse arguments
    args = parser.parse_args()

    # get the input dataframe
    df = io_lib.df_from_input(args)
    df = outlier_lib.sigma_edit_dataframe(
        args.sigma_thresh[0], args.cols, df, max_iter=args.max_iter[0])

    # write dataframe to output
    io_lib.df_to_output(args, df)