예제 #1
0
def test_row_number_does_not_require_order_by(alltypes):
    t = alltypes

    expr = t.group_by(t.g).mutate(ibis.row_number().name('foo'))
    expected = """\
SELECT *, (row_number() OVER (PARTITION BY `g`) - 1) AS `foo`
FROM ibis_testing.`alltypes`"""
    assert_sql_equal(expr, expected)

    expr = t.group_by(t.g).order_by(t.f).mutate(ibis.row_number().name('foo'))

    expected = """\
SELECT *, (row_number() OVER (PARTITION BY `g` ORDER BY `f`) - 1) AS `foo`
FROM ibis_testing.`alltypes`"""
    assert_sql_equal(expr, expected)
예제 #2
0
    def test_row_number_requires_order_by(self):
        t = self.con.table('alltypes')

        with self.assertRaises(com.ExpressionError):
            (t.group_by(t.g)
             .mutate(ibis.row_number().name('foo')))

        expr = (t.group_by(t.g)
                .order_by(t.f)
                .mutate(ibis.row_number().name('foo')))

        expected = """\
SELECT *, row_number() OVER (PARTITION BY g ORDER BY f) - 1 AS `foo`
FROM alltypes"""
        self._check_sql(expr, expected)
예제 #3
0
def test_row_number_requires_order_by(con):
    t = con.table('alltypes')

    with pytest.raises(com.ExpressionError):
        (t.group_by(t.g)
         .mutate(ibis.row_number().name('foo')))

    expr = (t.group_by(t.g)
            .order_by(t.f)
            .mutate(ibis.row_number().name('foo')))

    expected = """\
SELECT *, row_number() OVER (PARTITION BY `g` ORDER BY `f`) - 1 AS `foo`
FROM alltypes"""
    assert_sql_equal(expr, expected)
예제 #4
0
    def test_analytic_functions(self):
        t = self.alltypes.limit(1000)

        g = t.group_by("string_col").order_by("double_col")
        f = t.float_col

        exprs = [
            f.lag(),
            f.lead(),
            f.rank(),
            f.dense_rank(),
            f.first(),
            f.last(),
            f.first().over(ibis.window(preceding=10)),
            f.first().over(ibis.window(following=10)),
            ibis.row_number(),
            f.cumsum(),
            f.cummean(),
            f.cummin(),
            f.cummax(),
            # boolean cumulative reductions
            (f == 0).cumany(),
            (f == 0).cumall(),
            f.sum(),
            f.mean(),
            f.min(),
            f.max(),
        ]

        proj_exprs = [expr.name("e%d" % i) for i, expr in enumerate(exprs)]

        proj_table = g.mutate(proj_exprs)
        proj_table.execute()
예제 #5
0
    def test_window_with_arithmetic(self):
        t = self.alltypes
        w = ibis.window(order_by=t.timestamp_col)
        expr = t.mutate(new_col=ibis.row_number().over(w) / 2)

        df = t.projection(['timestamp_col']).sort_by('timestamp_col').execute()
        expected = df.assign(new_col=[x / 2. for x in range(len(df))])
        result = expr['timestamp_col', 'new_col'].execute()
        tm.assert_frame_equal(result, expected)
예제 #6
0
파일: test_window.py 프로젝트: loudinb/ibis
def test_row_number_properly_composes_with_arithmetic(alltypes):
    t = alltypes
    w = ibis.window(order_by=t.f)
    expr = t.mutate(new=ibis.row_number().over(w) / 2)

    expected = """\
SELECT *, (row_number() OVER (ORDER BY `f`) - 1) / 2 AS `new`
FROM ibis_testing.`alltypes`"""
    assert_sql_equal(expr, expected)
예제 #7
0
def test_row_number_properly_composes_with_arithmetic(con):
    t = con.table('alltypes')
    w = ibis.window(order_by=t.f)
    expr = t.mutate(new=ibis.row_number().over(w) / 2)

    expected = """\
SELECT *, (row_number() OVER (ORDER BY `f`) - 1) / 2 AS `new`
FROM alltypes"""
    assert_sql_equal(expr, expected)
예제 #8
0
    def test_window_with_arithmetic(self):
        t = self.alltypes
        w = ibis.window(order_by=t.timestamp_col)
        expr = t.mutate(new_col=ibis.row_number().over(w) / 2)

        df = t.projection(['timestamp_col']).sort_by('timestamp_col').execute()
        expected = df.assign(new_col=[x / 2. for x in range(len(df))])
        result = expr['timestamp_col', 'new_col'].execute()
        tm.assert_frame_equal(result, expected)
예제 #9
0
def test_row_number_properly_composes_with_arithmetic(alltypes):
    t = alltypes
    w = ibis.window(order_by=t.f)
    expr = t.mutate(new=ibis.row_number().over(w) / 2)

    expected = """\
SELECT *, (row_number() OVER (ORDER BY `f`) - 1) / 2 AS `new`
FROM ibis_testing.`alltypes`"""
    assert_sql_equal(expr, expected)
예제 #10
0
def test_window_with_arithmetic(alltypes, df):
    t = alltypes
    w = ibis.window(order_by=t.timestamp_col)
    expr = t.mutate(new_col=ibis.row_number().over(w) / 2)

    df = (df[['timestamp_col'
              ]].sort_values('timestamp_col').reset_index(drop=True))
    expected = df.assign(new_col=[x / 2.0 for x in range(len(df))])
    result = expr['timestamp_col', 'new_col'].execute()
    tm.assert_frame_equal(result, expected)
예제 #11
0
def test_order_by_desc(alltypes):
    t = alltypes

    w = window(order_by=ibis.desc(t.f))

    proj = t[t.f, ibis.row_number().over(w).name('revrank')]
    expected = """\
SELECT `f`, (row_number() OVER (ORDER BY `f` DESC) - 1) AS `revrank`
FROM ibis_testing.`alltypes`"""
    assert_sql_equal(proj, expected)

    expr = t.group_by('g').order_by(ibis.desc(t.f))[t.d.lag().name('foo'),
                                                    t.a.max()]
    expected = """\
SELECT lag(`d`) OVER (PARTITION BY `g` ORDER BY `f` DESC) AS `foo`,
       max(`a`) OVER (PARTITION BY `g` ORDER BY `f` DESC) AS `max`
FROM ibis_testing.`alltypes`"""
    assert_sql_equal(expr, expected)
예제 #12
0
    def test_order_by_desc(self):
        t = self.con.table('alltypes')

        w = window(order_by=ibis.desc(t.f))

        proj = t[t.f, ibis.row_number().over(w).name('revrank')]
        expected = """\
SELECT f, row_number() OVER (ORDER BY f DESC) - 1 AS `revrank`
FROM alltypes"""
        self._check_sql(proj, expected)

        expr = (t.group_by('g').order_by(ibis.desc(t.f))[t.d.lag().name('foo'),
                                                         t.a.max()])
        expected = """\
SELECT lag(d) OVER (PARTITION BY g ORDER BY f DESC) AS `foo`,
       max(a) OVER (PARTITION BY g ORDER BY f DESC) AS `max`
FROM alltypes"""
        self._check_sql(expr, expected)
예제 #13
0
    def test_analytic_exprs(self):
        t = self.table

        w = ibis.window(order_by=t.float_col)

        cases = [
            (ibis.row_number().over(w), "row_number() OVER (ORDER BY `float_col`) - 1"),
            (t.string_col.lag(), "lag(`string_col`)"),
            (t.string_col.lag(2), "lag(`string_col`, 2)"),
            (t.string_col.lag(default=0), "lag(`string_col`, 1, 0)"),
            (t.string_col.lead(), "lead(`string_col`)"),
            (t.string_col.lead(2), "lead(`string_col`, 2)"),
            (t.string_col.lead(default=0), "lead(`string_col`, 1, 0)"),
            (t.double_col.first(), "first_value(`double_col`)"),
            (t.double_col.last(), "last_value(`double_col`)"),
            # (t.double_col.nth(4), 'first_value(lag(double_col, 4 - 1))')
        ]
        self._check_expr_cases(cases)
예제 #14
0
    def test_order_by_desc(self):
        t = self.con.table('alltypes')

        w = window(order_by=ibis.desc(t.f))

        proj = t[t.f, ibis.row_number().over(w).name('revrank')]
        expected = """\
SELECT f, row_number() OVER (ORDER BY f DESC) - 1 AS `revrank`
FROM alltypes"""
        self._check_sql(proj, expected)

        expr = (t.group_by('g')
                .order_by(ibis.desc(t.f))
                [t.d.lag().name('foo'), t.a.max()])
        expected = """\
SELECT lag(d) OVER (PARTITION BY g ORDER BY f DESC) AS `foo`,
       max(a) OVER (PARTITION BY g ORDER BY f DESC) AS `max`
FROM alltypes"""
        self._check_sql(expr, expected)
예제 #15
0
    def test_analytic_exprs(self):
        t = self.table

        w = ibis.window(order_by=t.float_col)

        cases = [
            (ibis.row_number().over(w),
             'row_number() OVER (ORDER BY `float_col`) - 1'),
            (t.string_col.lag(), 'lag(`string_col`)'),
            (t.string_col.lag(2), 'lag(`string_col`, 2)'),
            (t.string_col.lag(default=0), 'lag(`string_col`, 1, 0)'),
            (t.string_col.lead(), 'lead(`string_col`)'),
            (t.string_col.lead(2), 'lead(`string_col`, 2)'),
            (t.string_col.lead(default=0), 'lead(`string_col`, 1, 0)'),
            (t.double_col.first(), 'first_value(`double_col`)'),
            (t.double_col.last(), 'last_value(`double_col`)'),
            # (t.double_col.nth(4), 'first_value(lag(double_col, 4 - 1))')
        ]
        self._check_expr_cases(cases)
예제 #16
0
def test_order_by_desc(alltypes):
    t = alltypes

    w = window(order_by=ibis.desc(t.f))

    proj = t[t.f, ibis.row_number().over(w).name('revrank')]
    expected = """\
SELECT `f`, (row_number() OVER (ORDER BY `f` DESC) - 1) AS `revrank`
FROM ibis_testing.`alltypes`"""
    assert_sql_equal(proj, expected)

    expr = t.group_by('g').order_by(ibis.desc(t.f))[
        t.d.lag().name('foo'), t.a.max()
    ]
    expected = """\
SELECT lag(`d`) OVER (PARTITION BY `g` ORDER BY `f` DESC) AS `foo`,
       max(`a`) OVER (PARTITION BY `g` ORDER BY `f` DESC) AS `max`
FROM ibis_testing.`alltypes`"""
    assert_sql_equal(expr, expected)
예제 #17
0
def test_analytic_functions(alltypes):
    t = alltypes.limit(1000)

    g = t.group_by('string_col').order_by('double_col')
    f = t.float_col

    exprs = [
        f.lag(),
        f.lead(),
        f.rank(),
        f.dense_rank(),
        f.percent_rank(),
        f.ntile(buckets=7),

        f.first(),
        f.last(),

        f.first().over(ibis.window(preceding=10)),
        f.first().over(ibis.window(following=10)),

        ibis.row_number(),
        f.cumsum(),
        f.cummean(),
        f.cummin(),
        f.cummax(),

        # boolean cumulative reductions
        (f == 0).cumany(),
        (f == 0).cumall(),

        f.sum(),
        f.mean(),
        f.min(),
        f.max()
    ]

    proj_exprs = [expr.name('e%d' % i)
                  for i, expr in enumerate(exprs)]

    proj_table = g.mutate(proj_exprs)
    proj_table.execute()
예제 #18
0
파일: test_window.py 프로젝트: jelitox/ibis
     lambda t: t,
     id='ntile',
     marks=pytest.mark.xfail,
 ),
 param(
     lambda t, win: t.float_col.first().over(win),
     lambda t: t.float_col.transform('first'),
     id='first',
 ),
 param(
     lambda t, win: t.float_col.last().over(win),
     lambda t: t.float_col.transform('last'),
     id='last',
 ),
 param(
     lambda t, win: ibis.row_number().over(win),
     lambda t: t.cumcount(),
     id='row_number',
     marks=pytest.mark.xfail_backends(
         ('pandas', 'dask'),
         raises=(IndexError, com.UnboundExpressionError),
     ),
 ),
 param(
     lambda t, win: t.double_col.cumsum().over(win),
     lambda t: t.double_col.cumsum(),
     id='cumsum',
 ),
 param(
     lambda t, win: t.double_col.cummean().over(win),
     lambda t:
예제 #19
0
     marks=pytest.mark.xfail,
 ),
 param(
     lambda t: t.float_col.first().over(ibis.window(preceding=10)),
     lambda t: t,
     id='first_preceding',
     marks=pytest.mark.xfail,
 ),
 param(
     lambda t: t.float_col.first().over(ibis.window(following=10)),
     lambda t: t,
     id='first_following',
     marks=pytest.mark.xfail,
 ),
 param(
     lambda t: ibis.row_number(),
     lambda t: pd.Series(np.arange(len(t))),
     id='row_number',
     marks=pytest.mark.xfail,
 ),
 param(
     lambda t: t.double_col.cumsum(),
     lambda t: t.double_col.cumsum(),
     id='cumsum',
 ),
 param(
     lambda t: t.double_col.cummean(),
     lambda t: t.double_col.expanding()
     .mean()
     .reset_index(drop=True, level=0),
     id='cummean',
예제 #20
0
     marks=pytest.mark.xfail,
 ),
 param(
     lambda t: t.float_col.first().over(ibis.window(preceding=10)),
     lambda t: t,
     id='first_preceding',
     marks=pytest.mark.xfail,
 ),
 param(
     lambda t: t.float_col.first().over(ibis.window(following=10)),
     lambda t: t,
     id='first_following',
     marks=pytest.mark.xfail,
 ),
 param(
     lambda t: ibis.row_number(),
     lambda t: pd.Series(np.arange(len(t))),
     id='row_number',
     marks=pytest.mark.xfail,
 ),
 param(
     lambda t: t.double_col.cumsum(),
     lambda t: t.double_col.cumsum(),
     id='cumsum',
 ),
 param(
     lambda t: t.double_col.cummean(),
     lambda t: t.double_col.expanding()
     .mean()
     .reset_index(drop=True, level=0),
     id='cummean',
예제 #21
0
import pytest

import ibis
from ibis.backends.impala.tests.conftest import translate


@pytest.fixture(scope="module")
def table(mockcon):
    return mockcon.table("functional_alltypes")


@pytest.mark.parametrize(
    ("expr_fn", "expected"),
    [
        pytest.param(
            lambda t: ibis.row_number().over(ibis.window(order_by=t.float_col)
                                             ),
            '(row_number() OVER (ORDER BY `float_col`) - 1)',
        ),
        pytest.param(lambda t: t.string_col.lag(),
                     'lag(`string_col`)',
                     id="lag_default"),
        pytest.param(lambda t: t.string_col.lag(2),
                     'lag(`string_col`, 2)',
                     id="lag_arg"),
        pytest.param(
            lambda t: t.string_col.lag(default=0),
            'lag(`string_col`, 1, 0)',
            id="lag_explicit_default",
        ),
        pytest.param(
            lambda t: t.string_col.lead(),