예제 #1
0
    def test_summarize_by_array(self):
        df = create_df()
        df["G"] = ["G1", "G1", "G1", "G2", "G2"]
        df["H"] = ["H1", "H2", "H2", "H3", "H3"]

        w = Wrap(df)
        w2 = w.summarize("count(), max(B) by G, H")
        warrays = w.summarize(["count()", "max(B)"], ["G", "H"])

        print(w2.df)
        print(warrays.df)

        pd.testing.assert_frame_equal(w2.df, warrays.df)
예제 #2
0
    def test_summarize_var_noby(self):
        df = create_df()
        df["F"] = [1, 1, 2, 3, 3]
        w = Wrap(df)
        c = w.summarize("variance(F)")

        self.assertListEqual(["variance_F"], list(c.df.columns))
        self.assertListEqual([1], list(c.df["variance_F"]))
예제 #3
0
    def test_summarize_countif_noby(self):
        df = create_df()
        df["F"] = [1, 1, 2, 4, 3]
        w = Wrap(df)
        c = w.summarize("C=countif(F > 2)")

        self.assertListEqual(["C"], list(c.df.columns))
        self.assertListEqual([2], list(c.df["C"]))
예제 #4
0
    def test_summarize_avg_noby(self):
        df = create_df()
        df["F"] = [1, 1, 2, 3, 3]
        w = Wrap(df)
        c = w.summarize("avg(F)")

        self.assertListEqual(["avg_F"], list(c.df.columns))
        self.assertListEqual([2], list(c.df["avg_F"]))
예제 #5
0
    def test_summarize_min_noby(self):
        df = create_df()
        df["F"] = [1, 8, 2, 3, 3]
        w = Wrap(df)
        c = w.summarize("min(F)")

        self.assertListEqual(["min_F"], list(c.df.columns))
        self.assertListEqual([1], list(c.df["min_F"]))
예제 #6
0
    def test_summarize_percentile(self):
        df = create_df()
        w = Wrap(df)
        w = w.summarize(["percentiles(B, 50, 75)"], "G")

        print(w.df)

        self.assertListEqual([1.0, 3.0], list(w.df["percentiles_B_50"]))
        self.assertListEqual([2.0, 3.5], list(w.df["percentiles_B_75"]))
예제 #7
0
    def test_summarize_percentile_one_arg(self):
        df = create_df()
        w = Wrap(df)
        w = w.summarize(["myperc = percentiles(B, 50)"], "G")

        print(w.df)

        self.assertListEqual([1.0, 3.0], list(w.df["myperc"]))
        self.assertListEqual(["G", "myperc"], list(w.df.columns))
예제 #8
0
    def test_summarize_avg(self):
        df = create_df()
        df["G"] = ["G1", "G1", "G2", "G1", "G2"]
        df["F"] = [1, 1, 3, 4, 3]

        w = Wrap(df)
        w = w.summarize(["avg(F)"], "G")

        self.assertListEqual([2, 3], list(w.df["avg_F"]))
        self.assertListEqual(["G", "avg_F"], list(w.df.columns))
예제 #9
0
    def test_summarize_dcount_noby(self):
        df = create_df()
        w = Wrap(df)
        c = w.summarize("dcount(G)")

        print()
        print(c.df)

        self.assertListEqual(["dcount_G"], list(c.df.columns))
        self.assertListEqual([2], list(c.df["dcount_G"]))
예제 #10
0
def test_summarize_make_list_no_by():
    df = pd.DataFrame()
    df["G"] = [1, 1, 1, 2, 2]
    df["A"] = [4, 4, 5, 6, 6]

    w = Wrap(df)
    wnew = w.summarize("make_list(A)")

    assert ["make_list_A"] == list(wnew.df.columns)
    assert [4, 4, 5, 6, 6] == wnew.df["make_list_A"][0]
예제 #11
0
    def test_summarize_max_noby(self):
        df = create_df()
        w = Wrap(df)
        c = w.summarize("max(B)")

        print()
        print(c.df)

        self.assertListEqual(["max_B"], list(c.df.columns))
        self.assertListEqual([4], list(c.df["max_B"]))
예제 #12
0
    def test_summarize_count_noby(self):
        df = create_df()
        w = Wrap(df)
        c = w.summarize("count()")

        print()
        print(c.df)

        assert ["count_"] == list(c.df.columns)
        assert [5] == list(c.df["count_"])
예제 #13
0
    def test_summarize_any_nonull(self):
        df = create_df()
        df["G"] = ["G1", "G1", "G2", "G1", "G2"]
        df["B"] = [None, 1, 2, 3, 4]

        w = Wrap(df)
        wnew = w.summarize("any(B) by G")
        self.assertListEqual(list(["G1", "G2"]), list(wnew.df["G"]))
        self.assertListEqual(list([1, 2]), list(wnew.df["any_B"]))
        self.assertEqual(2, len(wnew.df.columns))
예제 #14
0
    def test_summarize_percentile_singlepercentile_noby(self):
        df = create_df()
        w = Wrap(df)
        w = w.summarize(["percentiles(B, 50)"])

        print()
        print(w.df)

        self.assertListEqual(["percentiles_B_50"], list(w.df.columns))
        self.assertListEqual([2.0], list(w.df["percentiles_B_50"]))
예제 #15
0
 def test_summarize_bin(self):
     df = create_df()
     df["D"] = pd.to_datetime([
         "2009-01-01T08:20", "2009-01-02T08:51", "2009-01-01", "2009-01-06",
         "2009-01-01T22:00"
     ])
     w = Wrap(df)
     wnew = w.summarize("Z=count()", "bin(D, 1d)")
     self.assertListEqual(list([3, 1, 1]), list(wnew.df["Z"]))
     self.assertListEqual(["bin_D", "Z"], list(wnew.df.columns))
예제 #16
0
    def test_summarize_any_noby(self):
        df = create_df()
        df["F"] = [8, 1, 2, 3, 3]
        w = Wrap(df)
        c = w.summarize("any(F)")

        print()
        print(c.df)

        assert ["any_F"] == list(c.df.columns)
        assert [8] == list(c.df["any_F"])
예제 #17
0
    def test_summarize_std(self):
        df = create_df()
        df["G"] = ["G1", "G1", "G2", "G1", "G2"]
        df["F"] = [1, 1, 3, 4, 3]

        w = Wrap(df)
        w = w.summarize(["stdev(F)"], "G")

        print(w.df)
        np.testing.assert_almost_equal(w.df["stdev_F"], [1.732051, 0], 3)
        self.assertListEqual(["G", "stdev_F"], list(w.df.columns))
예제 #18
0
def test_summarize_varianceif_noby():
    df = pd.DataFrame()
    df["B"] = [None, 10, 11, 1, 0]
    df["C"] = [True, False, False, True, True]

    w = Wrap(df)
    wnew = w.summarize("varianceif(B, C)")
    print()
    print(wnew)
    np.testing.assert_almost_equal(wnew.df["varianceif_B_C"], [0.5], 3)
    assert 1 == len(wnew.df.columns)
예제 #19
0
    def test_summarize_argmax_noby(self):
        df = create_df()
        df["F"] = [8, 1, 2, 3, 3]
        w = Wrap(df)
        c = w.summarize("argmax(F, C)")

        print()
        print(c.df)

        self.assertListEqual(["argmax_F_C"], list(c.df.columns))
        self.assertListEqual(["foo1"], list(c.df["argmax_F_C"]))
예제 #20
0
def test_summarize_anyif_noby_allnull():
    df = pd.DataFrame()
    df["B"] = [None, None, None, None, None]
    df["C"] = [True, False, False, True, True]

    w = Wrap(df)
    wnew = w.summarize("anyif(B, C)")
    print()
    print(wnew)
    assert list([None]) == list(wnew.df["anyif_B_C"])
    assert 1 == len(wnew.df.columns)
예제 #21
0
def test_summarize_make_set():
    df = pd.DataFrame()
    df["G"] = [1, 1, 1, 2, 2]
    df["A"] = [4, 4, 5, 6, 6]

    w = Wrap(df)
    wnew = w.summarize("make_set(A) by G")

    assert ["G", "make_set_A"] == list(wnew.df.columns)
    assert set([4, 5]) == wnew.df["make_set_A"][0]
    assert set([6]) == wnew.df["make_set_A"][1]
예제 #22
0
    def test_summarize(self):
        df = create_df()
        w = Wrap(df)
        w = w.summarize("x=count()", "G")

        expected = pd.DataFrame({
            "G": ["G1", "G2"],
            "x": [3, 2],
        })

        self.assertTrue(w.df.equals(expected))
예제 #23
0
def test_summarize_minif_noby():
    df = pd.DataFrame()
    df["B"] = [None, 10, -11, 1, 3]
    df["C"] = [True, False, False, True, True]

    w = Wrap(df)
    wnew = w.summarize("minif(B, C)")
    print()
    print(wnew)
    assert list([1]) == list(wnew.df["minif_B_C"])
    assert 1 == len(wnew.df.columns)
예제 #24
0
    def test_summarize_max_count_noby(self):
        df = create_df()
        w = Wrap(df)
        c = w.summarize("M=max(B), C=count()")

        print()
        print(c.df)

        self.assertListEqual(["M", "C"], list(c.df.columns))
        self.assertListEqual([4], list(c.df["M"]))
        self.assertListEqual([5], list(c.df["C"]))
예제 #25
0
    def test_summarize_max(self):
        df = create_df()
        df["G"] = ["G1", "G1", "G2", "G1", "G2"]
        df["F"] = [1, 0, 9, 4, 8]

        w = Wrap(df)
        wnew = w.summarize(["max(F)"], "G")

        print(w.df)

        self.assertListEqual(list(wnew.df["max_F"]), [4, 9])
        self.assertListEqual(["G", "max_F"], list(wnew.df.columns))
예제 #26
0
def test_summarize_make_list_if():
    df = pd.DataFrame()
    df["G"] = [1, 1, 1, 2, 2]
    df["A"] = [4, 4, 5, 6, 6]
    df["B"] = [False, True, True, True, True]

    w = Wrap(df)
    wnew = w.summarize("make_list_if(A, B) by G")

    assert ["G", "make_list_if_A_B"] == list(wnew.df.columns)
    assert [4, 5] == wnew.df["make_list_if_A_B"][0]
    assert [6, 6] == wnew.df["make_list_if_A_B"][1]
예제 #27
0
def test_summarize_composit_argument_method_noby():
    df = pd.DataFrame()
    df["A"] = ["HI", "HI", "HI", "HI"]
    df["G"] = [1, 1, 1, 0]

    w = Wrap(df)
    wnew = w.summarize("tolower(any(A))")

    print()
    print(wnew)
    assert ["hi"] == list(wnew.df["Column1"])
    assert 1 == len(wnew.df.columns)
예제 #28
0
    def test_summarize_any_2args(self):
        df = create_df()
        df["G"] = ["G1", "G1", "G2", "G1", "G2"]
        df["B"] = [None, 10, 20, 30, 40]
        df["C"] = [0, None, None, 3, 4]

        w = Wrap(df)
        wnew = w.summarize("any(B, C) by G")
        self.assertListEqual(list(["G1", "G2"]), list(wnew.df["G"]))
        self.assertListEqual(list([30, 40]), list(wnew.df["any_B"]))
        self.assertListEqual(list([3, 4]), list(wnew.df["any_C"]))
        self.assertEqual(3, len(wnew.df.columns))
예제 #29
0
    def test_summarize_argmax(self):
        df = create_df()
        df["G"] = ["G1", "G1", "G2", "G1", "G2"]
        df["F"] = [1, 0, 9, 4, 8]

        w = Wrap(df)
        wnew = w.summarize(["argmax(F, C)"], "G")

        print(wnew.df)

        self.assertListEqual(list(wnew.df["argmax_F_C"]), ["foo4", "foo3"])
        self.assertListEqual(["G", "argmax_F_C"], list(wnew.df.columns))
예제 #30
0
    def test_summarize_any_star_noby(self):
        df = pd.DataFrame()
        df["B"] = [None, 10, 20, 30, 40]
        df["C"] = [0, None, None, 3, 4]

        w = Wrap(df)
        wnew = w.summarize("any(*)")
        print()
        print(wnew)
        self.assertListEqual(list([30]), list(wnew.df["any_B"]))
        self.assertListEqual(list([3]), list(wnew.df["any_C"]))
        self.assertEqual(2, len(wnew.df.columns))