def test_window_with_mlb(): index = pd.date_range('20170501', '20170507') data = np.random.randn(len(index), 3) df = ( pd.DataFrame(data, columns=list('abc'), index=index) .rename_axis('time') .reset_index(drop=False) ) client = Backend().connect({'df': df}) t = client.table('df') rows_with_mlb = rows_with_max_lookback(5, ibis.interval(days=10)) expr = t.mutate( sum=lambda df: df.a.sum().over( ibis.trailing_window(rows_with_mlb, order_by='time', group_by='b') ) ) result = expr.execute() expected = df.set_index('time') gb_df = ( expected.groupby(['b'])['a'] .rolling('10d', closed='both') .apply(lambda s: s.iloc[-5:].sum(), raw=False) .sort_index(level=['time']) .reset_index(drop=True) ) expected = expected.reset_index(drop=False).assign(sum=gb_df) tm.assert_frame_equal(result, expected) rows_with_mlb = rows_with_max_lookback(5, 10) with pytest.raises(com.IbisInputError): t.mutate( sum=lambda df: df.a.sum().over( ibis.trailing_window(rows_with_mlb, order_by='time') ) )
def test_window_equals(alltypes): t = alltypes w1 = ibis.window(preceding=1, following=2, group_by=t.a, order_by=t.b) w2 = ibis.window(preceding=1, following=2, group_by=t.a, order_by=t.b) assert w1.equals(w2) w3 = ibis.window(preceding=1, following=2, group_by=t.a, order_by=t.c) assert not w1.equals(w3) w4 = ibis.range_window(preceding=ibis.interval(hours=3), group_by=t.d) w5 = ibis.range_window(preceding=ibis.interval(hours=3), group_by=t.d) assert w4.equals(w5) w6 = ibis.range_window(preceding=ibis.interval(hours=1), group_by=t.d) assert not w4.equals(w6) w7 = ibis.trailing_window(rows_with_max_lookback(3, ibis.interval(days=5)), group_by=t.a, order_by=t.b) w8 = ibis.trailing_window(rows_with_max_lookback(3, ibis.interval(days=5)), group_by=t.a, order_by=t.b) assert w7.equals(w8) w9 = ibis.trailing_window(rows_with_max_lookback(3, ibis.interval(months=5)), group_by=t.a, order_by=t.b) assert not w7.equals(w9)
def test_combine_window_with_max_lookback(): w1 = ibis.trailing_window(rows_with_max_lookback(3, ibis.interval(days=5))) w2 = ibis.trailing_window(rows_with_max_lookback(5, ibis.interval(days=7))) w3 = w1.combine(w2) expected = ibis.trailing_window( rows_with_max_lookback(3, ibis.interval(days=5))) assert_equal(w3, expected)
def test_determine_how(): how = _determine_how((None, 5)) assert how == 'rows' how = _determine_how((3, 1)) assert how == 'rows' how = _determine_how(5) assert how == 'rows' how = _determine_how(np.int64(7)) assert how == 'rows' how = _determine_how(ibis.interval(days=3)) assert how == 'range' how = _determine_how(ibis.interval(months=5) + ibis.interval(days=10)) assert how == 'range' how = _determine_how(rows_with_max_lookback(3, ibis.interval(months=3))) assert how == 'rows' how = _determine_how(rows_with_max_lookback(3, pd.Timedelta(days=3))) assert how == 'rows' how = _determine_how( rows_with_max_lookback(np.int64(7), ibis.interval(months=3)) ) assert how == 'rows' with pytest.raises(TypeError): _determine_how(8.9) with pytest.raises(TypeError): _determine_how('invalid preceding') with pytest.raises(TypeError): _determine_how({'rows': 1, 'max_lookback': 2}) with pytest.raises(TypeError): _determine_how( rows_with_max_lookback( ibis.interval(days=3), ibis.interval(months=1) ) ) with pytest.raises(TypeError): _determine_how([3, 5])
def test_window_rows_with_max_lookback(con): t = con.table('alltypes') mlb = rows_with_max_lookback(3, ibis.interval(days=3)) w = ibis.trailing_window(mlb, order_by=t.i) expr = t.a.sum().over(w) with pytest.raises(NotImplementedError): ImpalaCompiler.to_sql(expr)
def test_rolling_window_with_mlb(alltypes): t = alltypes window = ibis.trailing_window( preceding=rows_with_max_lookback(3, ibis.interval(days=5)), order_by=t.timestamp_col, ) expr = t['double_col'].sum().over(window) with pytest.raises(NotImplementedError): expr.execute()
def test_replace_window(alltypes): t = alltypes w1 = ibis.window(preceding=5, following=1, group_by=t.a, order_by=t.b) w2 = w1.group_by(t.c) expected = ibis.window( preceding=5, following=1, group_by=[t.a, t.c], order_by=t.b ) assert_equal(w2, expected) w3 = w1.order_by(t.d) expected = ibis.window( preceding=5, following=1, group_by=t.a, order_by=[t.b, t.d] ) assert_equal(w3, expected) w4 = ibis.trailing_window( rows_with_max_lookback(3, ibis.interval(months=3)) ) w5 = w4.group_by(t.a) expected = ibis.trailing_window( rows_with_max_lookback(3, ibis.interval(months=3)), group_by=t.a ) assert_equal(w5, expected)
def test_max_rows_with_lookback_validate(alltypes): t = alltypes mlb = rows_with_max_lookback(3, ibis.interval(days=5)) window = ibis.trailing_window(mlb, order_by=t.i) t.f.lag().over(window) window = ibis.trailing_window(mlb) with pytest.raises(com.IbisInputError): t.f.lag().over(window) window = ibis.trailing_window(mlb, order_by=t.a) with pytest.raises(com.IbisInputError): t.f.lag().over(window) window = ibis.trailing_window(mlb, order_by=[t.i, t.a]) with pytest.raises(com.IbisInputError): t.f.lag().over(window)