def test_range_window_function(alltypes, project_id): t = alltypes w = ibis.range_window( preceding=1, following=0, group_by='year', order_by='month' ) expr = t.mutate(two_month_avg=t.float_col.mean().over(w)) result = expr.compile() expected = """\ SELECT *, avg(`float_col`) OVER (PARTITION BY `year` ORDER BY `month` RANGE BETWEEN 1 PRECEDING AND CURRENT ROW) AS `two_month_avg` FROM `{}.testing.functional_alltypes`""".format( # noqa: E501 project_id ) assert result == expected w3 = ibis.range_window( preceding=(4, 2), group_by='year', order_by='timestamp_col' ) expr = t.mutate(win_avg=t.float_col.mean().over(w3)) result = expr.compile() expected = """\ SELECT *, avg(`float_col`) OVER (PARTITION BY `year` ORDER BY UNIX_MICROS(`timestamp_col`) RANGE BETWEEN 4 PRECEDING AND 2 PRECEDING) AS `win_avg` FROM `{}.testing.functional_alltypes`""".format( # noqa: E501 project_id ) assert result == expected
def test_range_window_function(alltypes, project_id): t = alltypes w = ibis.range_window(preceding=1, following=0, group_by='year', order_by='month') expr = t.mutate(two_month_avg=t.float_col.mean().over(w)) result = expr.compile() expected = f"""\ SELECT *, avg(`float_col`) OVER (PARTITION BY `year` ORDER BY `month` RANGE BETWEEN 1 PRECEDING AND CURRENT ROW) AS `two_month_avg` FROM `{project_id}.testing.functional_alltypes`""" # noqa: E501 assert result == expected w3 = ibis.range_window(preceding=(4, 2), group_by='year', order_by='timestamp_col') expr = t.mutate(win_avg=t.float_col.mean().over(w3)) result = expr.compile() expected = f"""\ SELECT *, avg(`float_col`) OVER (PARTITION BY `year` ORDER BY UNIX_MICROS(`timestamp_col`) RANGE BETWEEN 4 PRECEDING AND 2 PRECEDING) AS `win_avg` FROM `{project_id}.testing.functional_alltypes`""" # noqa: E501 assert result == expected
def test_grouped_bounded_range_window(backend, alltypes, df): # Explanation of the range window spec below: # # `preceding=10, following=0, order_by='id'``: # The window at a particular row (call its `id` value x) will contain # some other row (call its `id` value y) if x-10 <= y <= x. # `group_by='string_col'`: # The window at a particular row will only contain other rows that # have the same 'string_col' value. preceding = 10 window = ibis.range_window( preceding=preceding, following=0, order_by='id', group_by='string_col', ) expr = alltypes.mutate(val=alltypes.double_col.sum().over(window)) result = expr.execute().set_index('id').sort_index() def gb_fn(df): indices = np.searchsorted(df.id, [df["prec"], df["foll"]], side="left") double_col = df.double_col.values return pd.Series( [double_col[start:stop].sum() for start, stop in indices.T], index=df.index, ) res = ( # add 1 to get the upper bound without having to make two # searchsorted calls df.assign(prec=lambda t: t.id - preceding, foll=lambda t: t.id + 1) .sort_values("id") .groupby("string_col") .apply(gb_fn) .droplevel(0) ) expected = ( df.assign( # Mimic our range window spec using .apply() val=res ) .set_index('id') .sort_index() ) backend.assert_series_equal(result.val, expected.val)
def test_combine_windows(alltypes): t = alltypes w1 = ibis.window(group_by=t.g, order_by=t.f) w2 = ibis.window(preceding=5, following=5) w3 = w1.combine(w2) expected = ibis.window( group_by=t.g, order_by=t.f, preceding=5, following=5 ) assert_equal(w3, expected) w4 = ibis.window(group_by=t.a, order_by=t.e) w5 = w3.combine(w4) expected = ibis.window( group_by=[t.g, t.a], order_by=[t.f, t.e], preceding=5, following=5 ) assert_equal(w5, expected) # Cannot combine windows of varying types. w6 = ibis.range_window(preceding=5, following=5) with pytest.raises(ibis.common.IbisInputError): w1.combine(w6)
def test_combine_windows(alltypes): t = alltypes w1 = ibis.window(group_by=t.g, order_by=t.f) w2 = ibis.window(preceding=5, following=5) w3 = w1.combine(w2) expected = ibis.window( group_by=t.g, order_by=t.f, preceding=5, following=5 ) assert_equal(w3, expected) w4 = ibis.window(group_by=t.a, order_by=t.e) w5 = w3.combine(w4) expected = ibis.window( group_by=[t.g, t.a], order_by=[t.f, t.e], preceding=5, following=5 ) assert_equal(w5, expected) # Cannot combine windows of varying types. w6 = ibis.range_window(preceding=5, following=5) with pytest.raises(ibis.common.exceptions.IbisInputError): w1.combine(w6)
def test_grouped_bounded_range_window(backend, alltypes, df, con): if not backend.supports_window_operations: pytest.skip( 'Backend {} does not support window operations'.format(backend)) # Explanation of the range window spec below: # # `preceding=10, following=0, order_by='id'``: # The window at a particular row (call its `id` value x) will contain # some other row (call its `id` value y) if x-10 <= y <= x. # `group_by='string_col'`: # The window at a particular row will only contain other rows that # have the same 'string_col' value. # window = ibis.range_window( preceding=10, following=0, order_by='id', group_by='string_col', ) expr = alltypes.mutate(val=alltypes.double_col.sum().over(window)) result = expr.execute().set_index('id').sort_index() expected = ( df.assign( # Mimic our range window spec using .apply() val=df.apply( lambda x: df.double_col[ (df.string_col == x.string_col) # Grouping by string_col & ((x.id - 10) <= df.id) # Corresponds to `preceding=10` & (df.id <= x.id) # Corresponds to `following=0` ].sum(), axis=1, )).set_index('id').sort_index()) left, right = result.val, expected.val backend.assert_series_equal(left, right)
group_by='key', ), (-3600, 0), ), param( ibis.trailing_window( preceding=ibis.interval(hours=2), order_by='time', group_by='key', ), (-7200, 0), ), param( ibis.range_window( preceding=0, following=ibis.interval(hours=1), order_by='time', group_by='key', ), (0, 3600), ), ], ) def test_time_indexed_window(client, ibis_window, spark_range): table = client.table('time_indexed_table') result = table.mutate( mean=table['value'].mean().over(ibis_window)).compile() result_pd = result.toPandas() spark_table = table.compile() spark_window = (Window.partitionBy('key').orderBy( F.col('time').cast('long')).rangeBetween(*spark_range)) expected = spark_table.withColumn(