Python tf_headの例、context.grama.tf_head Pythonの例

コード例 #1

0

ファイルを表示

    def test_arrange(self):
        df = (
            data.df_diamonds.groupby("cut")
            .apply(arrange_apply_helperfunc)
            .reset_index(drop=True)
        )
        d = (
            data.df_diamonds
            >> gr.tf_group_by("cut")
            >> gr.tf_arrange("depth", ascending=False)
            >> gr.tf_head(5)
            >> gr.tf_ungroup()
        ).reset_index(drop=True)
        self.assertTrue(df.equals(d))

        d = (
            data.df_diamonds
            >> gr.tf_group_by("cut")
            >> gr.tf_arrange(X.depth, ascending=False)
            >> gr.tf_head(5)
            >> gr.tf_ungroup()
        ).reset_index(drop=True)
        assert df.equals(d)

        df = data.df_diamonds.sort_values(["cut", "price"], ascending=False)
        d = data.df_diamonds >> gr.tf_arrange(gr.desc(X.cut), gr.desc(X.price))
        self.assertTrue(df.equals(d))

コード例 #2

0

ファイルを表示

 def test_sd(self):
     df = (
         data.df_diamonds
         >> gr.tf_group_by(X.cut)
         >> gr.tf_head(3)
         >> gr.tf_select(X.cut, X.x)
         >> gr.tf_ungroup()
     )
     # straight summarize
     t = df >> gr.tf_summarize(s=gr.sd(X.x))
     df_truth = pd.DataFrame({"s": [0.829091]})
     test_vector = abs(t.s - df_truth.s)
     self.assertTrue(all(test_vector < 0.00001))
     # grouped summarize
     t = df >> gr.tf_group_by(X.cut) >> gr.tf_summarize(s=gr.sd(X.x))
     df_truth = pd.DataFrame(
         {
             "cut": ["Fair", "Good", "Ideal", "Premium", "Very Good"],
             "s": [1.440417, 0.148436, 0.236925, 0.181934, 0.072342],
         }
     )
     test_vector = abs(t.s - df_truth.s)
     self.assertTrue(all(test_vector < 0.00001))
     # straight mutate
     t = df >> gr.tf_mutate(s=gr.sd(X.x))
     df_truth = df.copy()
     df_truth["s"] = 0.829091
     test_vector = abs(t.s - df_truth.s)
     self.assertTrue(all(test_vector < 0.00001))
     # grouped mutate
     t = df >> gr.tf_group_by(X.cut) >> gr.tf_mutate(s=gr.sd(X.x))
     # df_truth['s'] = pd.Series([1.440417, 1.440417, 1.440417, 0.148436, 0.148436, 0.148436,
     #                            0.236925, 0.236925, 0.236925, 0.181934, 0.181934, 0.181934,
     #                            0.072342, 0.072342, 0.072342],
     #                           index=t.index)
     # test_vector = abs(t.s - df_truth.s)
     # print(t)
     # print(df_truth)
     self.assertTrue(all(test_vector < 0.00001))
     # test with single value (var undefined)
     df = (
         data.df_diamonds
         >> gr.tf_group_by(X.cut)
         >> gr.tf_head(1)
         >> gr.tf_select(X.cut, X.x)
     )
     t = df >> gr.tf_group_by(X.cut) >> gr.tf_summarize(s=gr.sd(X.x))
     df_truth = pd.DataFrame(
         {
             "cut": ["Fair", "Good", "Ideal", "Premium", "Very Good"],
             "s": [np.nan, np.nan, np.nan, np.nan, np.nan],
         }
     )
     self.assertTrue(t.equals(df_truth))

コード例 #3

0

ファイルを表示

    def test_var(self):
        df = (
            data.df_diamonds
            >> gr.tf_group_by(X.cut)
            >> gr.tf_head(3)
            >> gr.tf_select(X.cut, X.x)
            >> gr.tf_ungroup()
        )

        # straight summarize
        t = df >> gr.tf_summarize(v=gr.var(X.x))
        df_truth = pd.DataFrame({"v": [0.687392]})
        test_vector = abs(t.v - df_truth.v)
        self.assertTrue(all(test_vector < 0.00001))

        # grouped summarize
        t = df >> gr.tf_group_by(X.cut) >> gr.tf_summarize(v=gr.var(X.x))
        df_truth = pd.DataFrame(
            {
                "cut": ["Fair", "Good", "Ideal", "Premium", "Very Good"],
                "v": [2.074800, 0.022033, 0.056133, 0.033100, 0.005233],
            }
        )
        test_vector = abs(t.v - df_truth.v)
        self.assertTrue(all(test_vector < 0.00001))
        # straight mutate
        t = df >> gr.tf_mutate(v=gr.var(X.x))
        df_truth = df.copy()
        df_truth["v"] = 0.687392
        test_vector = abs(t.v - df_truth.v)
        self.assertTrue(all(test_vector < 0.00001))
        # grouped mutate
        # t = df >> group_by(X.cut) >> mutate(v=var(X.x))
        # df_truth['v'] = pd.Series([2.074800, 2.074800, 2.074800, 0.022033, 0.022033, 0.022033,
        #                            0.056133, 0.056133, 0.056133, 0.033100, 0.033100, 0.033100,
        #                            0.005233, 0.005233, 0.005233],
        #                           index=t.index)
        # test_vector = abs(t.v - df_truth.v)
        # assert all(test_vector < .00001)
        # test with single value (var undefined)
        df = (
            data.df_diamonds
            >> gr.tf_group_by(X.cut)
            >> gr.tf_head(1)
            >> gr.tf_select(X.cut, X.x)
        )
        t = df >> gr.tf_group_by(X.cut) >> gr.tf_summarize(v=gr.var(X.x))
        df_truth = pd.DataFrame(
            {
                "cut": ["Fair", "Good", "Ideal", "Premium", "Very Good"],
                "v": [np.nan, np.nan, np.nan, np.nan, np.nan],
            }
        )
        self.assertTrue(t.equals(df_truth))

コード例 #4

0

ファイルを表示

    def test_median(self):
        df = (
            data.df_diamonds
            >> gr.tf_group_by(X.cut)
            >> gr.tf_head(3)
            >> gr.tf_select(X.cut, X.x)
            >> gr.tf_ungroup()
        )
        # straight summarize
        t = df >> gr.tf_summarize(m=gr.median(X.x))
        df_truth = pd.DataFrame({"m": [4.05]})
        self.assertTrue(t.equals(df_truth))

        # grouped summarize
        t = df >> gr.tf_group_by(X.cut) >> gr.tf_summarize(m=gr.median(X.x))
        df_truth = pd.DataFrame(
            {
                "cut": ["Fair", "Good", "Ideal", "Premium", "Very Good"],
                "m": [6.27, 4.25, 3.95, 3.89, 3.95],
            }
        )
        self.assertTrue(t.equals(df_truth))
        # straight mutate
        t = df >> gr.tf_mutate(m=gr.median(X.x))
        df_truth = df.copy()
        df_truth["m"] = 4.05
        self.assertTrue(t.equals(df_truth))
        # grouped mutate
        # t = df >> group_by(X.cut) >> mutate(m=median(X.x))
        # df_truth['m'] = pd.Series(
        #     [6.27, 6.27, 6.27, 4.25, 4.25, 4.25, 3.95, 3.95, 3.95, 3.89, 3.89, 3.89, 3.95, 3.95, 3.95],
        #     index=t.index)
        # assert t.equals(df_truth)
        # make sure it handles case with even counts properly
        df = (
            data.df_diamonds
            >> gr.tf_group_by(X.cut)
            >> gr.tf_head(2)
            >> gr.tf_select(X.cut, X.x)
        )
        t = df >> gr.tf_group_by(X.cut) >> gr.tf_summarize(m=gr.median(X.x))
        df_truth = pd.DataFrame(
            {
                "cut": ["Fair", "Good", "Ideal", "Premium", "Very Good"],
                "m": [5.160, 4.195, 3.940, 4.045, 3.945],
            }
        )
        test_vector = abs(t.m - df_truth.m)
        self.assertTrue(all(test_vector < 0.000000001))

コード例 #5

0

ファイルを表示

 def test_nth(self):
     df = data.df_diamonds >> gr.tf_select(X.cut, X.x) >> gr.tf_head(10)
     # straight summarize
     t = df >> gr.tf_summarize(second=gr.nth(X.x, 1))
     df_truth = pd.DataFrame({"second": [3.89]})
     self.assertTrue(t.equals(df_truth))
     # grouped summarize
     t = df >> gr.tf_group_by(
         X.cut) >> gr.tf_summarize(first=gr.nth(X.x, 0))
     df_truth = pd.DataFrame({
         "cut": ["Fair", "Good", "Ideal", "Premium", "Very Good"],
         "first": [3.87, 4.05, 3.95, 3.89, 3.94],
     })
     self.assertTrue(t.equals(df_truth))
     # summarize with order_by
     t = df >> gr.tf_summarize(last=gr.nth(
         X.x, -1, order_by=[gr.desc(X.cut), gr.desc(X.x)]))
     df_truth = pd.DataFrame({"last": [3.87]})
     self.assertTrue(df_truth.equals(t))
     # straight mutate
     t = df >> gr.tf_mutate(out_of_range=gr.nth(X.x, 500))
     df_truth = df.copy()
     df_truth["out_of_range"] = np.nan
     self.assertTrue(t.equals(df_truth))
     # grouped mutate
     t = df >> gr.tf_group_by(
         X.cut) >> gr.tf_mutate(penultimate=gr.nth(X.x, -2))
     df_truth = df.copy()
     df_truth["penultimate"] = pd.Series(
         [np.nan, 3.89, 4.05, 3.89, 4.05, 4.07, 4.07, 4.07, np.nan, 4.07])
     self.assertTrue(t.sort_index().equals(df_truth))

コード例 #6

0

ファイルを表示

 def test_last(self):
     df = data.df_diamonds >> gr.tf_select(X.cut, X.x) >> gr.tf_head(5)
     # straight summarize
     t = df >> gr.tf_summarize(l=gr.last(X.x))
     df_truth = pd.DataFrame({"l": [4.34]})
     self.assertTrue(t.equals(df_truth))
     # grouped summarize
     t = df >> gr.tf_group_by(X.cut) >> gr.tf_summarize(l=gr.last(X.x))
     df_truth = pd.DataFrame({
         "cut": ["Good", "Ideal", "Premium"],
         "l": [4.34, 3.95, 4.20]
     })
     self.assertTrue(t.equals(df_truth))
     # summarize with order_by
     t = df >> gr.tf_summarize(f=gr.last(
         X.x, order_by=[gr.desc(X.cut), gr.desc(X.x)]))
     df_truth = pd.DataFrame({"f": [4.05]})
     assert df_truth.equals(t)
     # straight mutate
     t = df >> gr.tf_mutate(l=gr.last(X.x))
     df_truth = df.copy()
     df_truth["l"] = df_truth.x.iloc[4]
     self.assertTrue(t.equals(df_truth))
     # grouped mutate
     t = df >> gr.tf_group_by(X.cut) >> gr.tf_mutate(l=gr.last(X.x))
     df_truth["l"] = pd.Series([3.95, 4.20, 4.34, 4.20, 4.34])
     self.assertTrue(t.sort_index().equals(df_truth))

コード例 #7

0

ファイルを表示

 def test_first(self):
     df = data.df_diamonds >> gr.tf_select(X.cut, X.x) >> gr.tf_head(5)
     # straight summarize
     t = df >> gr.tf_summarize(f=gr.first(X.x))
     df_truth = pd.DataFrame({"f": [3.95]})
     self.assertTrue(t.equals(df_truth))
     # grouped summarize
     t = df >> gr.tf_group_by(X.cut) >> gr.tf_summarize(f=gr.first(X.x))
     df_truth = pd.DataFrame({
         "cut": ["Good", "Ideal", "Premium"],
         "f": [4.05, 3.95, 3.89]
     })
     self.assertTrue(t.equals(df_truth))
     # summarize with order_by
     t = df >> gr.tf_summarize(f=gr.first(X.x, order_by=gr.desc(X.cut)))
     df_truth = pd.DataFrame({"f": [3.89]})
     # straight mutate
     t = df >> gr.tf_mutate(f=gr.first(X.x))
     df_truth = df.copy()
     df_truth["f"] = df_truth.x.iloc[0]
     self.assertTrue(t.equals(df_truth))
     # grouped mutate
     t = df >> gr.tf_group_by(X.cut) >> gr.tf_mutate(f=gr.first(X.x))
     df_truth["f"] = pd.Series([3.95, 3.89, 4.05, 3.89, 4.05])
     self.assertTrue(t.sort_index().equals(df_truth))

コード例 #8

0

ファイルを表示

 def test_n(self):
     df = data.df_diamonds >> gr.tf_select(X.cut, X.x) >> gr.tf_head(5)
     # straight summarize
     t = df >> gr.tf_summarize(n=gr.n(X.x))
     df_truth = pd.DataFrame({"n": [5]})
     self.assertTrue(t.equals(df_truth))
     # grouped summarize
     t = df >> gr.tf_group_by(X.cut) >> gr.tf_summarize(n=gr.n(X.x))
     df_truth = pd.DataFrame({
         "cut": ["Good", "Ideal", "Premium"],
         "n": [2, 1, 2]
     })
     self.assertTrue(t.equals(df_truth))
     # straight mutate
     t = df >> gr.tf_mutate(n=gr.n(X.x))
     df_truth = df.copy()
     df_truth["n"] = 5
     self.assertTrue(t.equals(df_truth))
     # grouped mutate
     t = df >> gr.tf_group_by(X.cut) >> gr.tf_mutate(n=gr.n(X.x))
     df_truth["n"] = pd.Series([1, 2, 2, 2, 2, 2])
     self.assertTrue(t.sort_index().equals(df_truth))
     # Implicit mode summarize
     t = df >> gr.tf_summarize(n=gr.n())
     df_truth = pd.DataFrame({"n": [5]})
     self.assertTrue(t.equals(df_truth))
     # Implicit mode mutate
     t = df >> gr.tf_group_by(X.cut) >> gr.tf_mutate(n=gr.n())
     df_truth = df.copy()
     df_truth["n"] = pd.Series([1, 2, 2, 2, 2, 2])
     self.assertTrue(t.sort_index().equals(df_truth))

コード例 #9

0

ファイルを表示

    def test_desc(self):
        df = data.df_diamonds >> gr.tf_select(X.cut, X.x) >> gr.tf_head(10)
        t = df >> gr.tf_summarize(last=gr.nth(
            X.x, -1, order_by=[gr.desc(X.cut), gr.desc(X.x)]))

        series_num = pd.Series([4, 1, 3, 2])
        series_bool = pd.Series([True, False, True, False])
        series_str = pd.Series(["d", "a", "c", "b"])

        num_truth = series_num.rank(method="min", ascending=False)
        bool_truth = series_bool.rank(method="min", ascending=False)
        str_truth = series_str.rank(method="min", ascending=False)

        self.assertTrue(gr.desc(series_num).equals(num_truth))
        self.assertTrue(gr.desc(series_bool).equals(bool_truth))
        self.assertTrue(gr.desc(series_str).equals(str_truth))

コード例 #10

0

ファイルを表示

 def test_max(self):
     df = data.df_diamonds >> gr.tf_select(X.cut, X.x) >> gr.tf_head(5)
     # straight summarize
     t = df >> gr.tf_summarize(m=gr.max(X.x))
     df_truth = pd.DataFrame({"m": [4.34]})
     self.assertTrue(t.equals(df_truth))
     # grouped summarize
     t = df >> gr.tf_group_by(X.cut) >> gr.tf_summarize(m=gr.max(X.x))
     df_truth = pd.DataFrame(
         {"cut": ["Good", "Ideal", "Premium"], "m": [4.34, 3.95, 4.20]}
     )
     self.assertTrue(t.equals(df_truth))
     # straight mutate
     t = df >> gr.tf_mutate(m=gr.max(X.x))
     df_truth = df.copy()
     df_truth["m"] = 4.34
     self.assertTrue(t.equals(df_truth))
     # grouped mutate
     t = df >> gr.tf_group_by(X.cut) >> gr.tf_mutate(m=gr.max(X.x))
     df_truth["m"] = pd.Series([3.95, 4.20, 4.34, 4.20, 4.34])
     self.assertTrue(t.sort_index().equals(df_truth))

コード例 #11

0

ファイルを表示

 def test_IQR(self):
     df = data.df_diamonds >> gr.tf_select(X.cut, X.x) >> gr.tf_head(5)
     # straight summarize
     t = df >> gr.tf_summarize(i=gr.IQR(X.x))
     df_truth = pd.DataFrame({"i": [0.25]})
     self.assertTrue(t.equals(df_truth))
     # grouped summarize
     t = df >> gr.tf_group_by(X.cut) >> gr.tf_summarize(i=gr.IQR(X.x))
     df_truth = pd.DataFrame(
         {"cut": ["Good", "Ideal", "Premium"], "i": [0.145, 0.000, 0.155]}
     )
     test_vector = abs(t.i - df_truth.i)
     assert all(test_vector < 0.000000001)
     # straight mutate
     t = df >> gr.tf_mutate(i=gr.IQR(X.x))
     df_truth = df.copy()
     df_truth["i"] = 0.25
     self.assertTrue(t.equals(df_truth))
     # grouped mutate
     t = df >> gr.tf_group_by(X.cut) >> gr.tf_mutate(i=gr.IQR(X.x))
     df_truth["i"] = pd.Series([0.000, 0.155, 0.145, 0.155, 0.145])
     test_vector = abs(t.i - df_truth.i)
     self.assertTrue(all(test_vector < 0.000000001))