def testSerializeLocalTrain(self): sess = new_session() with LocalContext(sess._sess): dmatrix = ToDMatrix(data=self.X, label=self.y)() model = XGBTrain(dtrain=dmatrix)() graph = model.build_graph(tiled=True) DAG.from_json(graph.to_json()) dmatrix = ToDMatrix(data=self.X_df, label=self.y_series, output_types=[OutputType.dataframe])() model = XGBTrain(dtrain=dmatrix)() graph = model.build_graph(tiled=True) DAG.from_json(graph.to_json()) new_X = mt.random.rand(1000, 10, chunk_size=(1000, 5)) new_X, new_y = ToDMatrix(data=new_X, label=self.y, multi_output=True)() dmatrix = ToDMatrix(data=new_X, label=new_y)() dmatrix = dmatrix.tiles() self.assertEqual(len(dmatrix.chunks), 1)
def testPercentileExecution(self): raw = np.random.rand(20, 10) q = np.random.RandomState(0).randint(100, size=11) a = tensor(raw, chunk_size=7) r = percentile(a, q) result = self.executor.execute_tensor(r, concat=True)[0] expected = np.percentile(raw, q) np.testing.assert_array_equal(result, expected) mq = tensor(q) this = self class MockSession: def __init__(self): self.executor = this.executor ctx = LocalContext(MockSession()) executor = ExecutorForTest('numpy', storage=ctx) with ctx: r = percentile(a, mq) result = executor.execute_tensors([r])[0] np.testing.assert_array_equal(result, expected)
def testHistogramExecution(self): rs = np.random.RandomState(0) raw = rs.randint(10, size=(20,)) a = tensor(raw, chunk_size=3) raw_weights = rs.random(20) weights = tensor(raw_weights, chunk_size=4) # range provided for range_ in [(0, 10), (3, 11), (3, 7)]: bin_edges = histogram(a, range=range_)[0] result = self.executor.execute_tensor(bin_edges)[0] expected = np.histogram(raw, range=range_)[0] np.testing.assert_array_equal(result, expected) for wt in (raw_weights, weights): for density in (True, False): bins = [1, 4, 6, 9] bin_edges = histogram(a, bins=bins, weights=wt, density=density)[0] result = self.executor.execute_tensor(bin_edges)[0] expected = np.histogram( raw, bins=bins, weights=raw_weights, density=density)[0] np.testing.assert_almost_equal(result, expected) this = self class MockSession: def __init__(self): self.executor = this.executor ctx = LocalContext(MockSession()) executor = ExecutorForTest('numpy', storage=ctx) with ctx: raw2 = rs.randint(10, size=(1,)) b = tensor(raw2) raw3 = rs.randint(10, size=(0,)) c = tensor(raw3) for t, r in [(a, raw), (b, raw2), (c, raw3), (sort(a), raw)]: for density in (True, False): test_bins = [10, 'stone', 'auto', 'doane', 'fd', 'rice', 'scott', 'sqrt', 'sturges'] for bins in test_bins: hist = histogram(t, bins=bins, density=density)[0] if r.size > 0: with self.assertRaises(TilesError): executor.execute_tensor(hist) result = executor.execute_tensors([hist])[0] expected = np.histogram(r, bins=bins, density=density)[0] np.testing.assert_array_equal(result, expected) test_bins = [[0, 4, 8], tensor([0, 4, 8], chunk_size=2)] for bins in test_bins: hist = histogram(t, bins=bins, density=density)[0] result = executor.execute_tensors([hist])[0] expected = np.histogram(r, bins=[0, 4, 8], density=density)[0] np.testing.assert_array_equal(result, expected)
def setUp(self) -> None: this = self class MockSession: @property def executor(self): return this.executor self.ctx = ctx = LocalContext(MockSession()) self.executor = ExecutorForTest('numpy', storage=ctx) ctx.__enter__()
def testHistogramBinEdgesExecution(self): rs = np.random.RandomState(0) raw = rs.randint(10, size=(20,)) a = tensor(raw, chunk_size=3) # range provided for range_ in [(0, 10), (3, 11), (3, 7)]: bin_edges = histogram_bin_edges(a, range=range_) result = self.executor.execute_tensor(bin_edges)[0] expected = np.histogram_bin_edges(raw, range=range_) np.testing.assert_array_equal(result, expected) this = self class MockSession: def __init__(self): self.executor = this.executor ctx = LocalContext(MockSession()) executor = ExecutorForTest('numpy', storage=ctx) with ctx: raw2 = rs.randint(10, size=(1,)) b = tensor(raw2) raw3 = rs.randint(10, size=(0,)) c = tensor(raw3) for t, r in [(a, raw), (b, raw2), (c, raw3), (sort(a), raw)]: test_bins = [10, 'stone', 'auto', 'doane', 'fd', 'rice', 'scott', 'sqrt', 'sturges'] for bins in test_bins: bin_edges = histogram_bin_edges(t, bins=bins) if r.size > 0: with self.assertRaises(TilesError): executor.execute_tensor(bin_edges) result = executor.execute_tensors([bin_edges])[0] expected = np.histogram_bin_edges(r, bins=bins) np.testing.assert_array_equal(result, expected) test_bins = [[0, 4, 8], tensor([0, 4, 8], chunk_size=2)] for bins in test_bins: bin_edges = histogram_bin_edges(t, bins=bins) result = executor.execute_tensors([bin_edges])[0] expected = np.histogram_bin_edges(r, bins=[0, 4, 8]) np.testing.assert_array_equal(result, expected) raw = np.arange(5) a = tensor(raw, chunk_size=3) bin_edges = histogram_bin_edges(a) result = executor.execute_tensors([bin_edges])[0] expected = np.histogram_bin_edges(raw) self.assertEqual(bin_edges.shape, expected.shape) np.testing.assert_array_equal(result, expected)
def _create_test_context(cls, executor=None): d = {'executor': executor} class MockSession: def __init__(self): self.executor = d['executor'] ctx = LocalContext(MockSession()) new_executor = d['executor'] = \ ExecutorForTest('numpy', storage=ctx) return ctx, new_executor
def testSeriesQuantileExecution(self): raw = pd.Series(np.random.rand(10), name='a') a = Series(raw, chunk_size=3) # q = 0.5, scalar r = a.quantile() result = self.executor.execute_dataframe(r, concat=True)[0] expected = raw.quantile() self.assertEqual(result, expected) # q is a list r = a.quantile([0.3, 0.7]) result = self.executor.execute_dataframe(r, concat=True)[0] expected = raw.quantile([0.3, 0.7]) pd.testing.assert_series_equal(result, expected) # test interpolation r = a.quantile([0.3, 0.7], interpolation='midpoint') result = self.executor.execute_dataframe(r, concat=True)[0] expected = raw.quantile([0.3, 0.7], interpolation='midpoint') pd.testing.assert_series_equal(result, expected) this = self class MockSession: def __init__(self): self.executor = this.executor ctx = LocalContext(MockSession()) executor = ExecutorForTest('numpy', storage=ctx) with ctx: q = tensor([0.3, 0.7]) # q is a tensor r = a.quantile(q) result = executor.execute_dataframes([r])[0] expected = raw.quantile([0.3, 0.7]) pd.testing.assert_series_equal(result, expected)
def testStoreHDF5Execution(self): raw = np.random.RandomState(0).rand(10, 20) group_name = 'test_group' dataset_name = 'test_dataset' t1 = tensor(raw, chunk_size=20) t2 = tensor(raw, chunk_size=9) with self.assertRaises(TypeError): tohdf5(object(), t2) this = self class MockSession: def __init__(self): self.executor = this.executor ctx = LocalContext(MockSession()) executor = ExecutorForTest('numpy', storage=ctx) with ctx: with tempfile.TemporaryDirectory() as d: filename = os.path.join(d, 'test_store_{}.hdf5'.format(int(time.time()))) # test 1 chunk r = tohdf5(filename, t1, group=group_name, dataset=dataset_name) executor.execute_tensor(r) with h5py.File(filename, 'r') as f: result = np.asarray(f['{}/{}'.format(group_name, dataset_name)]) np.testing.assert_array_equal(result, raw) # test filename r = tohdf5(filename, t2, group=group_name, dataset=dataset_name) executor.execute_tensor(r) rt = get_tiled(r) self.assertEqual(type(rt.chunks[0].inputs[1].op).__name__, 'SuccessorsExclusive') self.assertEqual(len(rt.chunks[0].inputs[1].inputs), 0) with h5py.File(filename, 'r') as f: result = np.asarray(f['{}/{}'.format(group_name, dataset_name)]) np.testing.assert_array_equal(result, raw) with self.assertRaises(ValueError): tohdf5(filename, t2) with h5py.File(filename, 'r') as f: # test file r = tohdf5(f, t2, group=group_name, dataset=dataset_name) executor.execute_tensor(r) with h5py.File(filename, 'r') as f: result = np.asarray(f['{}/{}'.format(group_name, dataset_name)]) np.testing.assert_array_equal(result, raw) with self.assertRaises(ValueError): with h5py.File(filename, 'r') as f: tohdf5(f, t2) with h5py.File(filename, 'r') as f: # test dataset ds = f['{}/{}'.format(group_name, dataset_name)] # test file r = tohdf5(ds, t2) executor.execute_tensor(r) with h5py.File(filename, 'r') as f: result = np.asarray(f['{}/{}'.format(group_name, dataset_name)]) np.testing.assert_array_equal(result, raw)
def testDataFrameQuantileExecution(self): raw = pd.DataFrame( { 'a': np.random.rand(10), 'b': np.random.randint(1000, size=10), 'c': np.random.rand(10), 'd': [np.random.bytes(10) for _ in range(10)], 'e': [pd.Timestamp('201{}'.format(i)) for i in range(10)], 'f': [pd.Timedelta('{} days'.format(i)) for i in range(10)] }, index=pd.RangeIndex(1, 11)) df = DataFrame(raw, chunk_size=3) # q = 0.5, axis = 0, series r = df.quantile() result = self.executor.execute_dataframe(r, concat=True)[0] expected = raw.quantile() pd.testing.assert_series_equal(result, expected) # q = 0.5, axis = 1, series r = df.quantile(axis=1) result = self.executor.execute_dataframe(r, concat=True)[0] expected = raw.quantile(axis=1) pd.testing.assert_series_equal(result, expected) # q is a list, axis = 0, dataframe r = df.quantile([0.3, 0.7]) result = self.executor.execute_dataframe(r, concat=True)[0] expected = raw.quantile([0.3, 0.7]) pd.testing.assert_frame_equal(result, expected) # q is a list, axis = 1, dataframe r = df.quantile([0.3, 0.7], axis=1) result = self.executor.execute_dataframe(r, concat=True)[0] expected = raw.quantile([0.3, 0.7], axis=1) pd.testing.assert_frame_equal(result, expected) # test interpolation r = df.quantile([0.3, 0.7], interpolation='midpoint') result = self.executor.execute_dataframe(r, concat=True)[0] expected = raw.quantile([0.3, 0.7], interpolation='midpoint') pd.testing.assert_frame_equal(result, expected) this = self class MockSession: def __init__(self): self.executor = this.executor ctx = LocalContext(MockSession()) executor = ExecutorForTest('numpy', storage=ctx) with ctx: q = tensor([0.3, 0.7]) # q is a tensor r = df.quantile(q) result = executor.execute_dataframes([r])[0] expected = raw.quantile([0.3, 0.7]) pd.testing.assert_frame_equal(result, expected) # test numeric_only raw2 = pd.DataFrame( { 'a': np.random.rand(10), 'b': np.random.randint(1000, size=10), 'c': np.random.rand(10), 'd': [pd.Timestamp('201{}'.format(i)) for i in range(10)], }, index=pd.RangeIndex(1, 11)) df2 = DataFrame(raw2, chunk_size=3) r = df2.quantile([0.3, 0.7], numeric_only=False) result = self.executor.execute_dataframe(r, concat=True)[0] expected = raw2.quantile([0.3, 0.7], numeric_only=False) pd.testing.assert_frame_equal(result, expected) r = df2.quantile(numeric_only=False) result = self.executor.execute_dataframe(r, concat=True)[0] expected = raw2.quantile(numeric_only=False) pd.testing.assert_series_equal(result, expected)
def testQuantileExecution(self): # test 1 chunk, 1-d raw = np.random.rand(20) a = tensor(raw, chunk_size=20) raw2 = raw.copy() raw2[np.random.RandomState(0).randint(raw.size, size=3)] = np.nan a2 = tensor(raw2, chunk_size=20) for q in [np.random.RandomState(0).rand(), np.random.RandomState(0).rand(5)]: for interpolation in INTERPOLATION_TYPES: for keepdims in [True, False]: r = quantile(a, q, interpolation=interpolation, keepdims=keepdims) result = self.executor.execute_tensor(r, concat=True)[0] expected = np.quantile( raw, q, interpolation=interpolation, keepdims=keepdims) np.testing.assert_array_equal(result, expected) r2 = quantile(a2, q, interpolation=interpolation, keepdims=keepdims) result = self.executor.execute_tensor(r2, concat=True)[0] expected = np.quantile( raw2, q, interpolation=interpolation, keepdims=keepdims) np.testing.assert_array_equal(result, expected) # test 1 chunk, 2-d raw = np.random.rand(20, 10) a = tensor(raw, chunk_size=20) raw2 = raw.copy() raw2.flat[np.random.RandomState(0).randint(raw.size, size=3)] = np.nan a2 = tensor(raw2, chunk_size=20) for q in [np.random.RandomState(0).rand(), np.random.RandomState(0).rand(5)]: for interpolation in INTERPOLATION_TYPES: for keepdims in [True, False]: for axis in [None, 0, 1]: r = quantile(a, q, axis=axis, interpolation=interpolation, keepdims=keepdims) result = self.executor.execute_tensor(r, concat=True)[0] expected = np.quantile( raw, q, axis=axis, interpolation=interpolation, keepdims=keepdims) np.testing.assert_array_equal(result, expected) r2 = quantile(a2, q, axis=axis, interpolation=interpolation, keepdims=keepdims) result = self.executor.execute_tensor(r2, concat=True)[0] expected = np.quantile( raw2, q, axis=axis, interpolation=interpolation, keepdims=keepdims) np.testing.assert_array_equal(result, expected) # test multi chunks, 1-d raw = np.random.rand(20) a = tensor(raw, chunk_size=3) raw2 = raw.copy() raw2[np.random.RandomState(0).randint(raw.size, size=3)] = np.nan a2 = tensor(raw2, chunk_size=20) for q in [np.random.RandomState(0).rand(), np.random.RandomState(0).rand(5)]: for interpolation in INTERPOLATION_TYPES: for keepdims in [True, False]: r = quantile(a, q, interpolation=interpolation, keepdims=keepdims) result = self.executor.execute_tensor(r, concat=True)[0] expected = np.quantile( raw, q, interpolation=interpolation, keepdims=keepdims) np.testing.assert_array_equal(result, expected) r2 = quantile(a2, q, interpolation=interpolation, keepdims=keepdims) result = self.executor.execute_tensor(r2, concat=True)[0] expected = np.quantile( raw2, q, interpolation=interpolation, keepdims=keepdims) np.testing.assert_array_equal(result, expected) # test multi chunk, 2-d raw = np.random.rand(20, 10) a = tensor(raw, chunk_size=(3, 4)) raw2 = raw.copy() raw2.flat[np.random.RandomState(0).randint(raw.size, size=3)] = np.nan a2 = tensor(raw2, chunk_size=(3, 4)) for q in [np.random.RandomState(0).rand(), np.random.RandomState(0).rand(5)]: for interpolation in INTERPOLATION_TYPES: for keepdims in [True, False]: for axis in [None, 0, 1]: r = quantile(a, q, axis=axis, interpolation=interpolation, keepdims=keepdims) result = self.executor.execute_tensor(r, concat=True)[0] expected = np.quantile( raw, q, axis=axis, interpolation=interpolation, keepdims=keepdims) np.testing.assert_array_equal(result, expected) r2 = quantile(a2, q, axis=axis, interpolation=interpolation, keepdims=keepdims) result = self.executor.execute_tensor(r2, concat=True)[0] expected = np.quantile( raw2, q, axis=axis, interpolation=interpolation, keepdims=keepdims) np.testing.assert_array_equal(result, expected) # test out, 1 chunk raw = np.random.rand(20) q = np.random.rand(11) a = tensor(raw, chunk_size=20) out = empty((5, 11)) quantile(a, q, out=out) result = self.executor.execute_tensor(out, concat=True)[0] expected = np.quantile(raw, q, out=np.empty((5, 11))) np.testing.assert_array_equal(result, expected) # test out, multi chunks raw = np.random.rand(20) q = np.random.rand(11) a = tensor(raw, chunk_size=3) out = empty((5, 11)) quantile(a, q, out=out) result = self.executor.execute_tensor(out, concat=True)[0] expected = np.quantile(raw, q, out=np.empty((5, 11))) np.testing.assert_array_equal(result, expected) # test q which is a tensor q_raw = np.random.RandomState(0).rand(5) q = tensor(q_raw, chunk_size=3) this = self class MockSession: def __init__(self): self.executor = this.executor ctx = LocalContext(MockSession()) executor = ExecutorForTest('numpy', storage=ctx) with ctx: r = quantile(a, q, axis=None) result = executor.execute_tensors([r])[0] expected = np.quantile(raw, q_raw, axis=None) np.testing.assert_array_equal(result, expected) with self.assertRaises(ValueError): q[0] = 1.1 r = quantile(a, q, axis=None) _ = executor.execute_tensors(r)[0]
def testCutExecution(self): rs = np.random.RandomState(0) raw = rs.random(15) * 1000 s = pd.Series(raw, index=['i{}'.format(i) for i in range(15)]) bins = [10, 100, 500] ii = pd.interval_range(10, 500, 3) labels = ['a', 'b'] t = tensor(raw, chunk_size=4) series = from_pandas_series(s, chunk_size=4) iii = from_pandas_index(ii, chunk_size=2) # cut on Series r = cut(series, bins) result = self.executor.execute_dataframe(r, concat=True)[0] pd.testing.assert_series_equal(result, pd.cut(s, bins)) r, b = cut(series, bins, retbins=True) r_result = self.executor.execute_dataframe(r, concat=True)[0] b_result = self.executor.execute_tensor(b, concat=True)[0] r_expected, b_expected = pd.cut(s, bins, retbins=True) pd.testing.assert_series_equal(r_result, r_expected) np.testing.assert_array_equal(b_result, b_expected) # cut on tensor r = cut(t, bins) # result and expected is array whose dtype is CategoricalDtype result = self.executor.execute_dataframe(r, concat=True)[0] expected = pd.cut(raw, bins) self.assertEqual(len(result), len(expected)) for r, e in zip(result, expected): np.testing.assert_equal(r, e) # one chunk r = cut(s, tensor(bins, chunk_size=2), right=False, include_lowest=True) result = self.executor.execute_dataframe(r, concat=True)[0] pd.testing.assert_series_equal( result, pd.cut(s, bins, right=False, include_lowest=True)) # test labels r = cut(t, bins, labels=labels) # result and expected is array whose dtype is CategoricalDtype result = self.executor.execute_dataframe(r, concat=True)[0] expected = pd.cut(raw, bins, labels=labels) self.assertEqual(len(result), len(expected)) for r, e in zip(result, expected): np.testing.assert_equal(r, e) r = cut(t, bins, labels=False) # result and expected is array whose dtype is CategoricalDtype result = self.executor.execute_tensor(r, concat=True)[0] expected = pd.cut(raw, bins, labels=False) np.testing.assert_array_equal(result, expected) # test labels which is tensor labels_t = tensor(['a', 'b'], chunk_size=1) r = cut(raw, bins, labels=labels_t, include_lowest=True) # result and expected is array whose dtype is CategoricalDtype result = self.executor.execute_dataframe(r, concat=True)[0] expected = pd.cut(raw, bins, labels=labels, include_lowest=True) self.assertEqual(len(result), len(expected)) for r, e in zip(result, expected): np.testing.assert_equal(r, e) # test labels=False r, b = cut(raw, ii, labels=False, retbins=True) # result and expected is array whose dtype is CategoricalDtype r_result = self.executor.execute_tileable(r, concat=True)[0] b_result = self.executor.execute_tileable(b, concat=True)[0] r_expected, b_expected = pd.cut(raw, ii, labels=False, retbins=True) for r, e in zip(r_result, r_expected): np.testing.assert_equal(r, e) pd.testing.assert_index_equal(b_result, b_expected) # test bins which is md.IntervalIndex r, b = cut(series, iii, labels=tensor(labels, chunk_size=1), retbins=True) r_result = self.executor.execute_dataframe(r, concat=True)[0] b_result = self.executor.execute_dataframe(b, concat=True)[0] r_expected, b_expected = pd.cut(s, ii, labels=labels, retbins=True) pd.testing.assert_series_equal(r_result, r_expected) pd.testing.assert_index_equal(b_result, b_expected) # test duplicates bins2 = [0, 2, 4, 6, 10, 10] r, b = cut(s, bins2, labels=False, retbins=True, right=False, duplicates='drop') r_result = self.executor.execute_dataframe(r, concat=True)[0] b_result = self.executor.execute_tensor(b, concat=True)[0] r_expected, b_expected = pd.cut(s, bins2, labels=False, retbins=True, right=False, duplicates='drop') pd.testing.assert_series_equal(r_result, r_expected) np.testing.assert_array_equal(b_result, b_expected) this = self class MockSession: def __init__(self): self.executor = this.executor ctx = LocalContext(MockSession()) executor = ExecutorForTest('numpy', storage=ctx) with ctx: # test integer bins r = cut(series, 3) result = executor.execute_dataframes([r])[0] pd.testing.assert_series_equal(result, pd.cut(s, 3)) r, b = cut(series, 3, right=False, retbins=True) r_result, b_result = executor.execute_dataframes([r, b]) r_expected, b_expected = pd.cut(s, 3, right=False, retbins=True) pd.testing.assert_series_equal(r_result, r_expected) np.testing.assert_array_equal(b_result, b_expected) # test min max same s2 = pd.Series([1.1] * 15) r = cut(s2, 3) result = executor.execute_dataframes([r])[0] pd.testing.assert_series_equal(result, pd.cut(s2, 3)) # test inf exist s3 = s2.copy() s3[-1] = np.inf with self.assertRaises(ValueError): executor.execute_dataframes([cut(s3, 3)])
def testRollingAggExecution(self): raw = pd.DataFrame({ 'a': np.random.randint(100, size=(10, )), 'b': np.random.rand(10), 'c': np.random.randint(100, size=(10, )), 'd': ['c' * i for i in np.random.randint(4, size=10)] }) raw.iloc[1, ::4] = np.nan s = raw.iloc[:, 1] dfs = [ md.DataFrame(raw, chunk_size=10), # 1 chunk md.DataFrame(raw, chunk_size=3) # multiple chunks on each axis ] funcs = ['min', ['max', 'mean'], {'c': ['std'], 'b': ['count', 'min']}] df2 = dfs[0].rolling(3).agg(funcs[2]) # test 1 chunk result = self.executor.execute_dataframe(df2, concat=True)[0] expected = raw.rolling(3).agg(funcs[2]) pd.testing.assert_frame_equal(result, expected) for window in [2, 5]: for center in [True, False]: for func in funcs: df2 = dfs[1].rolling(window, center=center).agg(func) result = self.executor.execute_dataframe(df2, concat=True)[0] expected = raw.rolling(window, center=center).agg(func) pd.testing.assert_frame_equal(result, expected) # test min_periods and win_type df2 = dfs[1].rolling(3, min_periods=1, win_type='triang').agg('sum') result = self.executor.execute_dataframe(df2, concat=True)[0] expected = raw.rolling(3, min_periods=1, win_type='triang').agg('sum') pd.testing.assert_frame_equal(result, expected) # test rolling getitem, series df2 = dfs[1].rolling(3)['b'].agg('sum') result = self.executor.execute_dataframe(df2, concat=True)[0] expected = raw.rolling(3)['b'].agg('sum') pd.testing.assert_series_equal(result, expected) # test rolling getitem, dataframe df2 = dfs[1].rolling(3)['c', 'b'].agg('sum') result = self.executor.execute_dataframe(df2, concat=True)[0] expected = raw.rolling(3)['c', 'b'].agg('sum') pd.testing.assert_frame_equal(result, expected) # test axis=1 df2 = dfs[1].rolling(3, axis=1).agg('sum') result = self.executor.execute_dataframe(df2, concat=True, check_nsplits=False)[0] expected = raw.rolling(3, axis=1).agg('sum') pd.testing.assert_frame_equal(result, expected) # test window which is offset raw2 = raw.copy() raw2.reset_index(inplace=True, drop=True) raw2.index = pd.date_range('2020-2-25', periods=10) df = md.DataFrame(raw2, chunk_size=3) for func in funcs: df2 = df.rolling('2d').agg(func) result = self.executor.execute_dataframe(df2, concat=True)[0] expected = raw2.rolling('2d').agg(func) pd.testing.assert_frame_equal(result, expected) series = [md.Series(s, chunk_size=10), md.Series(s, chunk_size=4)] funcs = ['min', ['max', 'mean'], {'c': 'std', 'b': 'count'}] for series in series: for window in [2, 3, 5]: for center in [True, False]: for func in funcs: series2 = series.rolling(window, center=center).agg(func) result = self.executor.execute_dataframe( series2, concat=True)[0] expected = s.rolling(window, center=center).agg(func) if isinstance(expected, pd.Series): pd.testing.assert_series_equal(result, expected) else: pd.testing.assert_frame_equal(result, expected) this = self class MockSession: def __init__(self): self.executor = this.executor ctx = LocalContext(MockSession()) executor = ExecutorForTest('numpy', storage=ctx) with ctx: df = md.DataFrame(raw, chunk_size=3) df = df[df.a > 0.5] r = df.rolling(3).agg('max') result = executor.execute_dataframes([r])[0] expected = raw[raw.a > 0.5].rolling(3).agg('max') pd.testing.assert_frame_equal(result, expected) series = md.Series(s, chunk_size=3) series = series[series > 0.5] r = series.rolling(3).agg('max') result = executor.execute_dataframes([r])[0] expected = s[s > 0.5].rolling(3).agg('max') pd.testing.assert_series_equal(result, expected) # test agg functions df = md.DataFrame(raw, chunk_size=3) for func in [ 'count', 'sum', 'mean', 'median', 'min', 'max', 'skew', 'kurt' ]: r = getattr(df.rolling(4), func)() result = self.executor.execute_dataframe(r, concat=True)[0] expected = getattr(raw.rolling(4), func)() pd.testing.assert_frame_equal(result, expected) for func in ['std', 'var']: r = getattr(df.rolling(4), func)(ddof=0) result = self.executor.execute_dataframe(r, concat=True)[0] expected = getattr(raw.rolling(4), func)(ddof=0) pd.testing.assert_frame_equal(result, expected)