def test_nan_byvar_transform(self):
        expect_df = self.df_nan_byvar.copy()
        expect_df["val_transform"] = expect_df["val"]

        out = pd_utils.groupby_merge(
            self.df_nan_byvar, "byvar", "transform", (lambda x: x)
        )

        assert_frame_equal(expect_df, out)
    def test_nan_byvar_and_nan_val_transform_numeric(self):
        non_standard_index = self.df_nan_byvar_and_val.copy()
        non_standard_index.index = [5, 6, 7, 8]

        expect_df = self.df_nan_byvar_and_val.copy()
        expect_df["val_transform"] = expect_df["val"] + 1
        expect_df.index = [5, 6, 7, 8]

        out = pd_utils.groupby_merge(
            non_standard_index, "byvar", "transform", (lambda x: x + 1)
        )

        assert_frame_equal(expect_df, out)
    def test_nan_byvar_sum(self):
        expect_df = pd.DataFrame(
            data=[
                ("a", 1, 1.0),
                (nan, 2, nan),
                ("b", 3, 7.0),
                ("b", 4, 7.0),
            ],
            columns=["byvar", "val", "val_sum"],
        )

        out = pd_utils.groupby_merge(self.df_nan_byvar, "byvar", "sum")

        assert_frame_equal(expect_df, out)
    def test_subset_max(self):
        byvars = ["PERMNO", "byvar"]
        out = pd_utils.groupby_merge(self.df, byvars, "max", subset="RET")
        expect_df = pd.DataFrame(
            [
                (10516, "a", "1/1/2000", 1.01, 1.04),
                (10516, "a", "1/2/2000", 1.02, 1.04),
                (10516, "a", "1/3/2000", 1.03, 1.04),
                (10516, "a", "1/4/2000", 1.04, 1.04),
                (10516, "b", "1/1/2000", 1.05, 1.08),
                (10516, "b", "1/2/2000", 1.06, 1.08),
                (10516, "b", "1/3/2000", 1.07, 1.08),
                (10516, "b", "1/4/2000", 1.08, 1.08),
                (10517, "a", "1/1/2000", 1.09, 1.12),
                (10517, "a", "1/2/2000", 1.10, 1.12),
                (10517, "a", "1/3/2000", 1.11, 1.12),
                (10517, "a", "1/4/2000", 1.12, 1.12),
            ],
            columns=["PERMNO", "byvar", "Date", "RET", "RET_max"],
        )

        assert_frame_equal(expect_df, out)
Exemple #5
0
def summary_timing_df(parsed_df):
    df = groupby_merge(parsed_df, 'function', 'sum', subset='time')
    df = groupby_merge(df, 'function', 'mean', subset='time')
    return df.drop(['time','orig order'], axis=1).drop_duplicates(
        ).sort_values('time_sum', ascending=False)