def test_summarize_by_array(self): df = create_df() df["G"] = ["G1", "G1", "G1", "G2", "G2"] df["H"] = ["H1", "H2", "H2", "H3", "H3"] w = Wrap(df) w2 = w.summarize("count(), max(B) by G, H") warrays = w.summarize(["count()", "max(B)"], ["G", "H"]) print(w2.df) print(warrays.df) pd.testing.assert_frame_equal(w2.df, warrays.df)
def test_summarize_var_noby(self): df = create_df() df["F"] = [1, 1, 2, 3, 3] w = Wrap(df) c = w.summarize("variance(F)") self.assertListEqual(["variance_F"], list(c.df.columns)) self.assertListEqual([1], list(c.df["variance_F"]))
def test_summarize_countif_noby(self): df = create_df() df["F"] = [1, 1, 2, 4, 3] w = Wrap(df) c = w.summarize("C=countif(F > 2)") self.assertListEqual(["C"], list(c.df.columns)) self.assertListEqual([2], list(c.df["C"]))
def test_summarize_avg_noby(self): df = create_df() df["F"] = [1, 1, 2, 3, 3] w = Wrap(df) c = w.summarize("avg(F)") self.assertListEqual(["avg_F"], list(c.df.columns)) self.assertListEqual([2], list(c.df["avg_F"]))
def test_summarize_min_noby(self): df = create_df() df["F"] = [1, 8, 2, 3, 3] w = Wrap(df) c = w.summarize("min(F)") self.assertListEqual(["min_F"], list(c.df.columns)) self.assertListEqual([1], list(c.df["min_F"]))
def test_summarize_percentile(self): df = create_df() w = Wrap(df) w = w.summarize(["percentiles(B, 50, 75)"], "G") print(w.df) self.assertListEqual([1.0, 3.0], list(w.df["percentiles_B_50"])) self.assertListEqual([2.0, 3.5], list(w.df["percentiles_B_75"]))
def test_summarize_percentile_one_arg(self): df = create_df() w = Wrap(df) w = w.summarize(["myperc = percentiles(B, 50)"], "G") print(w.df) self.assertListEqual([1.0, 3.0], list(w.df["myperc"])) self.assertListEqual(["G", "myperc"], list(w.df.columns))
def test_summarize_avg(self): df = create_df() df["G"] = ["G1", "G1", "G2", "G1", "G2"] df["F"] = [1, 1, 3, 4, 3] w = Wrap(df) w = w.summarize(["avg(F)"], "G") self.assertListEqual([2, 3], list(w.df["avg_F"])) self.assertListEqual(["G", "avg_F"], list(w.df.columns))
def test_summarize_dcount_noby(self): df = create_df() w = Wrap(df) c = w.summarize("dcount(G)") print() print(c.df) self.assertListEqual(["dcount_G"], list(c.df.columns)) self.assertListEqual([2], list(c.df["dcount_G"]))
def test_summarize_make_list_no_by(): df = pd.DataFrame() df["G"] = [1, 1, 1, 2, 2] df["A"] = [4, 4, 5, 6, 6] w = Wrap(df) wnew = w.summarize("make_list(A)") assert ["make_list_A"] == list(wnew.df.columns) assert [4, 4, 5, 6, 6] == wnew.df["make_list_A"][0]
def test_summarize_max_noby(self): df = create_df() w = Wrap(df) c = w.summarize("max(B)") print() print(c.df) self.assertListEqual(["max_B"], list(c.df.columns)) self.assertListEqual([4], list(c.df["max_B"]))
def test_summarize_count_noby(self): df = create_df() w = Wrap(df) c = w.summarize("count()") print() print(c.df) assert ["count_"] == list(c.df.columns) assert [5] == list(c.df["count_"])
def test_summarize_any_nonull(self): df = create_df() df["G"] = ["G1", "G1", "G2", "G1", "G2"] df["B"] = [None, 1, 2, 3, 4] w = Wrap(df) wnew = w.summarize("any(B) by G") self.assertListEqual(list(["G1", "G2"]), list(wnew.df["G"])) self.assertListEqual(list([1, 2]), list(wnew.df["any_B"])) self.assertEqual(2, len(wnew.df.columns))
def test_summarize_percentile_singlepercentile_noby(self): df = create_df() w = Wrap(df) w = w.summarize(["percentiles(B, 50)"]) print() print(w.df) self.assertListEqual(["percentiles_B_50"], list(w.df.columns)) self.assertListEqual([2.0], list(w.df["percentiles_B_50"]))
def test_summarize_bin(self): df = create_df() df["D"] = pd.to_datetime([ "2009-01-01T08:20", "2009-01-02T08:51", "2009-01-01", "2009-01-06", "2009-01-01T22:00" ]) w = Wrap(df) wnew = w.summarize("Z=count()", "bin(D, 1d)") self.assertListEqual(list([3, 1, 1]), list(wnew.df["Z"])) self.assertListEqual(["bin_D", "Z"], list(wnew.df.columns))
def test_summarize_any_noby(self): df = create_df() df["F"] = [8, 1, 2, 3, 3] w = Wrap(df) c = w.summarize("any(F)") print() print(c.df) assert ["any_F"] == list(c.df.columns) assert [8] == list(c.df["any_F"])
def test_summarize_std(self): df = create_df() df["G"] = ["G1", "G1", "G2", "G1", "G2"] df["F"] = [1, 1, 3, 4, 3] w = Wrap(df) w = w.summarize(["stdev(F)"], "G") print(w.df) np.testing.assert_almost_equal(w.df["stdev_F"], [1.732051, 0], 3) self.assertListEqual(["G", "stdev_F"], list(w.df.columns))
def test_summarize_varianceif_noby(): df = pd.DataFrame() df["B"] = [None, 10, 11, 1, 0] df["C"] = [True, False, False, True, True] w = Wrap(df) wnew = w.summarize("varianceif(B, C)") print() print(wnew) np.testing.assert_almost_equal(wnew.df["varianceif_B_C"], [0.5], 3) assert 1 == len(wnew.df.columns)
def test_summarize_argmax_noby(self): df = create_df() df["F"] = [8, 1, 2, 3, 3] w = Wrap(df) c = w.summarize("argmax(F, C)") print() print(c.df) self.assertListEqual(["argmax_F_C"], list(c.df.columns)) self.assertListEqual(["foo1"], list(c.df["argmax_F_C"]))
def test_summarize_anyif_noby_allnull(): df = pd.DataFrame() df["B"] = [None, None, None, None, None] df["C"] = [True, False, False, True, True] w = Wrap(df) wnew = w.summarize("anyif(B, C)") print() print(wnew) assert list([None]) == list(wnew.df["anyif_B_C"]) assert 1 == len(wnew.df.columns)
def test_summarize_make_set(): df = pd.DataFrame() df["G"] = [1, 1, 1, 2, 2] df["A"] = [4, 4, 5, 6, 6] w = Wrap(df) wnew = w.summarize("make_set(A) by G") assert ["G", "make_set_A"] == list(wnew.df.columns) assert set([4, 5]) == wnew.df["make_set_A"][0] assert set([6]) == wnew.df["make_set_A"][1]
def test_summarize(self): df = create_df() w = Wrap(df) w = w.summarize("x=count()", "G") expected = pd.DataFrame({ "G": ["G1", "G2"], "x": [3, 2], }) self.assertTrue(w.df.equals(expected))
def test_summarize_minif_noby(): df = pd.DataFrame() df["B"] = [None, 10, -11, 1, 3] df["C"] = [True, False, False, True, True] w = Wrap(df) wnew = w.summarize("minif(B, C)") print() print(wnew) assert list([1]) == list(wnew.df["minif_B_C"]) assert 1 == len(wnew.df.columns)
def test_summarize_max_count_noby(self): df = create_df() w = Wrap(df) c = w.summarize("M=max(B), C=count()") print() print(c.df) self.assertListEqual(["M", "C"], list(c.df.columns)) self.assertListEqual([4], list(c.df["M"])) self.assertListEqual([5], list(c.df["C"]))
def test_summarize_max(self): df = create_df() df["G"] = ["G1", "G1", "G2", "G1", "G2"] df["F"] = [1, 0, 9, 4, 8] w = Wrap(df) wnew = w.summarize(["max(F)"], "G") print(w.df) self.assertListEqual(list(wnew.df["max_F"]), [4, 9]) self.assertListEqual(["G", "max_F"], list(wnew.df.columns))
def test_summarize_make_list_if(): df = pd.DataFrame() df["G"] = [1, 1, 1, 2, 2] df["A"] = [4, 4, 5, 6, 6] df["B"] = [False, True, True, True, True] w = Wrap(df) wnew = w.summarize("make_list_if(A, B) by G") assert ["G", "make_list_if_A_B"] == list(wnew.df.columns) assert [4, 5] == wnew.df["make_list_if_A_B"][0] assert [6, 6] == wnew.df["make_list_if_A_B"][1]
def test_summarize_composit_argument_method_noby(): df = pd.DataFrame() df["A"] = ["HI", "HI", "HI", "HI"] df["G"] = [1, 1, 1, 0] w = Wrap(df) wnew = w.summarize("tolower(any(A))") print() print(wnew) assert ["hi"] == list(wnew.df["Column1"]) assert 1 == len(wnew.df.columns)
def test_summarize_any_2args(self): df = create_df() df["G"] = ["G1", "G1", "G2", "G1", "G2"] df["B"] = [None, 10, 20, 30, 40] df["C"] = [0, None, None, 3, 4] w = Wrap(df) wnew = w.summarize("any(B, C) by G") self.assertListEqual(list(["G1", "G2"]), list(wnew.df["G"])) self.assertListEqual(list([30, 40]), list(wnew.df["any_B"])) self.assertListEqual(list([3, 4]), list(wnew.df["any_C"])) self.assertEqual(3, len(wnew.df.columns))
def test_summarize_argmax(self): df = create_df() df["G"] = ["G1", "G1", "G2", "G1", "G2"] df["F"] = [1, 0, 9, 4, 8] w = Wrap(df) wnew = w.summarize(["argmax(F, C)"], "G") print(wnew.df) self.assertListEqual(list(wnew.df["argmax_F_C"]), ["foo4", "foo3"]) self.assertListEqual(["G", "argmax_F_C"], list(wnew.df.columns))
def test_summarize_any_star_noby(self): df = pd.DataFrame() df["B"] = [None, 10, 20, 30, 40] df["C"] = [0, None, None, 3, 4] w = Wrap(df) wnew = w.summarize("any(*)") print() print(wnew) self.assertListEqual(list([30]), list(wnew.df["any_B"])) self.assertListEqual(list([3]), list(wnew.df["any_C"])) self.assertEqual(2, len(wnew.df.columns))