def test_shift(self): import pandas as pd ds = Dataset({'col_' + str(i): np.random.rand(30) for i in range(5)}) ds.keycol = np.random.choice(['a', 'b', 'c'], 30) df = pd.DataFrame(ds.asdict()) rt_result = ds.gb('keycol').shift(periods=-2).trim() pd_result = df.groupby('keycol').shift(periods=-2).dropna(axis='rows') for k, v in rt_result.items(): self.assertTrue(bool(np.all(v == pd_result[k]))) rt_result = ds.gb('keycol').shift(periods=3).trim() pd_result = df.groupby('keycol').shift(periods=3).dropna(axis='rows') for k, v in rt_result.items(): self.assertTrue(bool(np.all(v == pd_result[k])))
def test_diff(self): import pandas as pd ds = Dataset({'col_' + str(i): np.random.rand(10) for i in range(5)}) ds.keycol = np.random.choice(['a', 'b', 'c'], 10) df = pd.DataFrame(ds.asdict()) rt_result = ds.gb('keycol').rolling_diff() pd_result = df.groupby('keycol').diff() for k, v in rt_result.items(): pdc = pd_result[k] pdcnan = isnan(pdc) self.assertTrue(bool(np.all(isnan(v) == pdcnan)), msg=f'{v} {pdc}') masked_valid_pd = isnotnan(pdc) masked_valid_rt = isnotnan(v) self.assertTrue(bool(np.all(masked_valid_pd == masked_valid_rt)))