def test_row_number_does_not_require_order_by(alltypes): t = alltypes expr = t.group_by(t.g).mutate(ibis.row_number().name('foo')) expected = """\ SELECT *, (row_number() OVER (PARTITION BY `g`) - 1) AS `foo` FROM ibis_testing.`alltypes`""" assert_sql_equal(expr, expected) expr = t.group_by(t.g).order_by(t.f).mutate(ibis.row_number().name('foo')) expected = """\ SELECT *, (row_number() OVER (PARTITION BY `g` ORDER BY `f`) - 1) AS `foo` FROM ibis_testing.`alltypes`""" assert_sql_equal(expr, expected)
def test_row_number_requires_order_by(self): t = self.con.table('alltypes') with self.assertRaises(com.ExpressionError): (t.group_by(t.g) .mutate(ibis.row_number().name('foo'))) expr = (t.group_by(t.g) .order_by(t.f) .mutate(ibis.row_number().name('foo'))) expected = """\ SELECT *, row_number() OVER (PARTITION BY g ORDER BY f) - 1 AS `foo` FROM alltypes""" self._check_sql(expr, expected)
def test_row_number_requires_order_by(con): t = con.table('alltypes') with pytest.raises(com.ExpressionError): (t.group_by(t.g) .mutate(ibis.row_number().name('foo'))) expr = (t.group_by(t.g) .order_by(t.f) .mutate(ibis.row_number().name('foo'))) expected = """\ SELECT *, row_number() OVER (PARTITION BY `g` ORDER BY `f`) - 1 AS `foo` FROM alltypes""" assert_sql_equal(expr, expected)
def test_analytic_functions(self): t = self.alltypes.limit(1000) g = t.group_by("string_col").order_by("double_col") f = t.float_col exprs = [ f.lag(), f.lead(), f.rank(), f.dense_rank(), f.first(), f.last(), f.first().over(ibis.window(preceding=10)), f.first().over(ibis.window(following=10)), ibis.row_number(), f.cumsum(), f.cummean(), f.cummin(), f.cummax(), # boolean cumulative reductions (f == 0).cumany(), (f == 0).cumall(), f.sum(), f.mean(), f.min(), f.max(), ] proj_exprs = [expr.name("e%d" % i) for i, expr in enumerate(exprs)] proj_table = g.mutate(proj_exprs) proj_table.execute()
def test_window_with_arithmetic(self): t = self.alltypes w = ibis.window(order_by=t.timestamp_col) expr = t.mutate(new_col=ibis.row_number().over(w) / 2) df = t.projection(['timestamp_col']).sort_by('timestamp_col').execute() expected = df.assign(new_col=[x / 2. for x in range(len(df))]) result = expr['timestamp_col', 'new_col'].execute() tm.assert_frame_equal(result, expected)
def test_row_number_properly_composes_with_arithmetic(alltypes): t = alltypes w = ibis.window(order_by=t.f) expr = t.mutate(new=ibis.row_number().over(w) / 2) expected = """\ SELECT *, (row_number() OVER (ORDER BY `f`) - 1) / 2 AS `new` FROM ibis_testing.`alltypes`""" assert_sql_equal(expr, expected)
def test_row_number_properly_composes_with_arithmetic(con): t = con.table('alltypes') w = ibis.window(order_by=t.f) expr = t.mutate(new=ibis.row_number().over(w) / 2) expected = """\ SELECT *, (row_number() OVER (ORDER BY `f`) - 1) / 2 AS `new` FROM alltypes""" assert_sql_equal(expr, expected)
def test_window_with_arithmetic(alltypes, df): t = alltypes w = ibis.window(order_by=t.timestamp_col) expr = t.mutate(new_col=ibis.row_number().over(w) / 2) df = (df[['timestamp_col' ]].sort_values('timestamp_col').reset_index(drop=True)) expected = df.assign(new_col=[x / 2.0 for x in range(len(df))]) result = expr['timestamp_col', 'new_col'].execute() tm.assert_frame_equal(result, expected)
def test_order_by_desc(alltypes): t = alltypes w = window(order_by=ibis.desc(t.f)) proj = t[t.f, ibis.row_number().over(w).name('revrank')] expected = """\ SELECT `f`, (row_number() OVER (ORDER BY `f` DESC) - 1) AS `revrank` FROM ibis_testing.`alltypes`""" assert_sql_equal(proj, expected) expr = t.group_by('g').order_by(ibis.desc(t.f))[t.d.lag().name('foo'), t.a.max()] expected = """\ SELECT lag(`d`) OVER (PARTITION BY `g` ORDER BY `f` DESC) AS `foo`, max(`a`) OVER (PARTITION BY `g` ORDER BY `f` DESC) AS `max` FROM ibis_testing.`alltypes`""" assert_sql_equal(expr, expected)
def test_order_by_desc(self): t = self.con.table('alltypes') w = window(order_by=ibis.desc(t.f)) proj = t[t.f, ibis.row_number().over(w).name('revrank')] expected = """\ SELECT f, row_number() OVER (ORDER BY f DESC) - 1 AS `revrank` FROM alltypes""" self._check_sql(proj, expected) expr = (t.group_by('g').order_by(ibis.desc(t.f))[t.d.lag().name('foo'), t.a.max()]) expected = """\ SELECT lag(d) OVER (PARTITION BY g ORDER BY f DESC) AS `foo`, max(a) OVER (PARTITION BY g ORDER BY f DESC) AS `max` FROM alltypes""" self._check_sql(expr, expected)
def test_analytic_exprs(self): t = self.table w = ibis.window(order_by=t.float_col) cases = [ (ibis.row_number().over(w), "row_number() OVER (ORDER BY `float_col`) - 1"), (t.string_col.lag(), "lag(`string_col`)"), (t.string_col.lag(2), "lag(`string_col`, 2)"), (t.string_col.lag(default=0), "lag(`string_col`, 1, 0)"), (t.string_col.lead(), "lead(`string_col`)"), (t.string_col.lead(2), "lead(`string_col`, 2)"), (t.string_col.lead(default=0), "lead(`string_col`, 1, 0)"), (t.double_col.first(), "first_value(`double_col`)"), (t.double_col.last(), "last_value(`double_col`)"), # (t.double_col.nth(4), 'first_value(lag(double_col, 4 - 1))') ] self._check_expr_cases(cases)
def test_order_by_desc(self): t = self.con.table('alltypes') w = window(order_by=ibis.desc(t.f)) proj = t[t.f, ibis.row_number().over(w).name('revrank')] expected = """\ SELECT f, row_number() OVER (ORDER BY f DESC) - 1 AS `revrank` FROM alltypes""" self._check_sql(proj, expected) expr = (t.group_by('g') .order_by(ibis.desc(t.f)) [t.d.lag().name('foo'), t.a.max()]) expected = """\ SELECT lag(d) OVER (PARTITION BY g ORDER BY f DESC) AS `foo`, max(a) OVER (PARTITION BY g ORDER BY f DESC) AS `max` FROM alltypes""" self._check_sql(expr, expected)
def test_analytic_exprs(self): t = self.table w = ibis.window(order_by=t.float_col) cases = [ (ibis.row_number().over(w), 'row_number() OVER (ORDER BY `float_col`) - 1'), (t.string_col.lag(), 'lag(`string_col`)'), (t.string_col.lag(2), 'lag(`string_col`, 2)'), (t.string_col.lag(default=0), 'lag(`string_col`, 1, 0)'), (t.string_col.lead(), 'lead(`string_col`)'), (t.string_col.lead(2), 'lead(`string_col`, 2)'), (t.string_col.lead(default=0), 'lead(`string_col`, 1, 0)'), (t.double_col.first(), 'first_value(`double_col`)'), (t.double_col.last(), 'last_value(`double_col`)'), # (t.double_col.nth(4), 'first_value(lag(double_col, 4 - 1))') ] self._check_expr_cases(cases)
def test_order_by_desc(alltypes): t = alltypes w = window(order_by=ibis.desc(t.f)) proj = t[t.f, ibis.row_number().over(w).name('revrank')] expected = """\ SELECT `f`, (row_number() OVER (ORDER BY `f` DESC) - 1) AS `revrank` FROM ibis_testing.`alltypes`""" assert_sql_equal(proj, expected) expr = t.group_by('g').order_by(ibis.desc(t.f))[ t.d.lag().name('foo'), t.a.max() ] expected = """\ SELECT lag(`d`) OVER (PARTITION BY `g` ORDER BY `f` DESC) AS `foo`, max(`a`) OVER (PARTITION BY `g` ORDER BY `f` DESC) AS `max` FROM ibis_testing.`alltypes`""" assert_sql_equal(expr, expected)
def test_analytic_functions(alltypes): t = alltypes.limit(1000) g = t.group_by('string_col').order_by('double_col') f = t.float_col exprs = [ f.lag(), f.lead(), f.rank(), f.dense_rank(), f.percent_rank(), f.ntile(buckets=7), f.first(), f.last(), f.first().over(ibis.window(preceding=10)), f.first().over(ibis.window(following=10)), ibis.row_number(), f.cumsum(), f.cummean(), f.cummin(), f.cummax(), # boolean cumulative reductions (f == 0).cumany(), (f == 0).cumall(), f.sum(), f.mean(), f.min(), f.max() ] proj_exprs = [expr.name('e%d' % i) for i, expr in enumerate(exprs)] proj_table = g.mutate(proj_exprs) proj_table.execute()
lambda t: t, id='ntile', marks=pytest.mark.xfail, ), param( lambda t, win: t.float_col.first().over(win), lambda t: t.float_col.transform('first'), id='first', ), param( lambda t, win: t.float_col.last().over(win), lambda t: t.float_col.transform('last'), id='last', ), param( lambda t, win: ibis.row_number().over(win), lambda t: t.cumcount(), id='row_number', marks=pytest.mark.xfail_backends( ('pandas', 'dask'), raises=(IndexError, com.UnboundExpressionError), ), ), param( lambda t, win: t.double_col.cumsum().over(win), lambda t: t.double_col.cumsum(), id='cumsum', ), param( lambda t, win: t.double_col.cummean().over(win), lambda t:
marks=pytest.mark.xfail, ), param( lambda t: t.float_col.first().over(ibis.window(preceding=10)), lambda t: t, id='first_preceding', marks=pytest.mark.xfail, ), param( lambda t: t.float_col.first().over(ibis.window(following=10)), lambda t: t, id='first_following', marks=pytest.mark.xfail, ), param( lambda t: ibis.row_number(), lambda t: pd.Series(np.arange(len(t))), id='row_number', marks=pytest.mark.xfail, ), param( lambda t: t.double_col.cumsum(), lambda t: t.double_col.cumsum(), id='cumsum', ), param( lambda t: t.double_col.cummean(), lambda t: t.double_col.expanding() .mean() .reset_index(drop=True, level=0), id='cummean',
import pytest import ibis from ibis.backends.impala.tests.conftest import translate @pytest.fixture(scope="module") def table(mockcon): return mockcon.table("functional_alltypes") @pytest.mark.parametrize( ("expr_fn", "expected"), [ pytest.param( lambda t: ibis.row_number().over(ibis.window(order_by=t.float_col) ), '(row_number() OVER (ORDER BY `float_col`) - 1)', ), pytest.param(lambda t: t.string_col.lag(), 'lag(`string_col`)', id="lag_default"), pytest.param(lambda t: t.string_col.lag(2), 'lag(`string_col`, 2)', id="lag_arg"), pytest.param( lambda t: t.string_col.lag(default=0), 'lag(`string_col`, 1, 0)', id="lag_explicit_default", ), pytest.param( lambda t: t.string_col.lead(),