def test_tf_applier_pandas_generator(self): df = self._get_x_df_with_str() policy = RandomPolicy(1, sequence_length=2, n_per_original=2, keep_original=False) applier = PandasTFApplier([square], policy) gen = applier.apply_generator(df, batch_size=2) df_expected = [ pd.DataFrame( { "num": [1, 1, 16, 16], "strs": ["x", "x", "y", "y"] }, index=[0, 0, 1, 1], ), pd.DataFrame({ "num": [81, 81], "strs": ["z", "z"] }, index=[2, 2]), ] for df_batch, df_batch_expected in zip(gen, df_expected): self.assertEqual(df_batch.num.dtype, "int64") pd.testing.assert_frame_equal(df_batch, df_batch_expected) pd.testing.assert_frame_equal(df, self._get_x_df_with_str())
def test_tf_applier_pandas_modify_in_place_generator(self): df = self._get_x_df_dict() policy = ApplyOnePolicy(n_per_original=2, keep_original=True) applier = PandasTFApplier([modify_in_place], policy) gen = applier.apply_generator(df, batch_size=2) idx = [0, 0, 0, 1, 1, 1, 2, 2, 2] df_expected = [ make_df(get_data_dict(DATA_IN_PLACE_EXPECTED[:6]), idx[:6], key="d"), make_df(get_data_dict(DATA_IN_PLACE_EXPECTED[6:]), idx[6:], key="d"), ] for df_batch, df_batch_expected in zip(gen, df_expected): pd.testing.assert_frame_equal(df_batch, df_batch_expected) pd.testing.assert_frame_equal(df, self._get_x_df_dict())
def test_tf_applier_returns_none_generator(self): df = self._get_x_df() policy = RandomPolicy( 1, sequence_length=2, n_per_original=2, keep_original=True ) applier = PandasTFApplier([square_returns_none], policy) gen = applier.apply_generator(df, batch_size=2) df_expected = [ make_df([1, 1, 1, 2], [0, 0, 0, 1]), make_df([3, 81, 81], [2, 2, 2]), ] for df_batch, df_batch_expected in zip(gen, df_expected): pd.testing.assert_frame_equal(df_batch, df_batch_expected) pd.testing.assert_frame_equal(df, self._get_x_df())