Exemple #1
0
def test_range_window_function(alltypes, project_id):
    t = alltypes
    w = ibis.range_window(
        preceding=1, following=0, group_by='year', order_by='month'
    )
    expr = t.mutate(two_month_avg=t.float_col.mean().over(w))
    result = expr.compile()
    expected = """\
SELECT *,
       avg(`float_col`) OVER (PARTITION BY `year` ORDER BY `month` RANGE BETWEEN 1 PRECEDING AND CURRENT ROW) AS `two_month_avg`
FROM `{}.testing.functional_alltypes`""".format(  # noqa: E501
        project_id
    )
    assert result == expected

    w3 = ibis.range_window(
        preceding=(4, 2), group_by='year', order_by='timestamp_col'
    )
    expr = t.mutate(win_avg=t.float_col.mean().over(w3))
    result = expr.compile()
    expected = """\
SELECT *,
       avg(`float_col`) OVER (PARTITION BY `year` ORDER BY UNIX_MICROS(`timestamp_col`) RANGE BETWEEN 4 PRECEDING AND 2 PRECEDING) AS `win_avg`
FROM `{}.testing.functional_alltypes`""".format(  # noqa: E501
        project_id
    )
    assert result == expected
Exemple #2
0
def test_range_window_function(alltypes, project_id):
    t = alltypes
    w = ibis.range_window(preceding=1,
                          following=0,
                          group_by='year',
                          order_by='month')
    expr = t.mutate(two_month_avg=t.float_col.mean().over(w))
    result = expr.compile()
    expected = f"""\
SELECT *,
       avg(`float_col`) OVER (PARTITION BY `year` ORDER BY `month` RANGE BETWEEN 1 PRECEDING AND CURRENT ROW) AS `two_month_avg`
FROM `{project_id}.testing.functional_alltypes`"""  # noqa: E501
    assert result == expected

    w3 = ibis.range_window(preceding=(4, 2),
                           group_by='year',
                           order_by='timestamp_col')
    expr = t.mutate(win_avg=t.float_col.mean().over(w3))
    result = expr.compile()
    expected = f"""\
SELECT *,
       avg(`float_col`) OVER (PARTITION BY `year` ORDER BY UNIX_MICROS(`timestamp_col`) RANGE BETWEEN 4 PRECEDING AND 2 PRECEDING) AS `win_avg`
FROM `{project_id}.testing.functional_alltypes`"""  # noqa: E501
    assert result == expected
Exemple #3
0
def test_grouped_bounded_range_window(backend, alltypes, df):
    # Explanation of the range window spec below:
    #
    # `preceding=10, following=0, order_by='id'``:
    #     The window at a particular row (call its `id` value x) will contain
    #     some other row (call its `id` value y) if x-10 <= y <= x.
    # `group_by='string_col'`:
    #     The window at a particular row will only contain other rows that
    #     have the same 'string_col' value.
    preceding = 10
    window = ibis.range_window(
        preceding=preceding,
        following=0,
        order_by='id',
        group_by='string_col',
    )
    expr = alltypes.mutate(val=alltypes.double_col.sum().over(window))
    result = expr.execute().set_index('id').sort_index()

    def gb_fn(df):
        indices = np.searchsorted(df.id, [df["prec"], df["foll"]], side="left")
        double_col = df.double_col.values
        return pd.Series(
            [double_col[start:stop].sum() for start, stop in indices.T],
            index=df.index,
        )

    res = (
        # add 1 to get the upper bound without having to make two
        # searchsorted calls
        df.assign(prec=lambda t: t.id - preceding, foll=lambda t: t.id + 1)
        .sort_values("id")
        .groupby("string_col")
        .apply(gb_fn)
        .droplevel(0)
    )
    expected = (
        df.assign(
            # Mimic our range window spec using .apply()
            val=res
        )
        .set_index('id')
        .sort_index()
    )

    backend.assert_series_equal(result.val, expected.val)
def test_combine_windows(alltypes):
    t = alltypes
    w1 = ibis.window(group_by=t.g, order_by=t.f)
    w2 = ibis.window(preceding=5, following=5)

    w3 = w1.combine(w2)
    expected = ibis.window(
        group_by=t.g, order_by=t.f, preceding=5, following=5
    )
    assert_equal(w3, expected)

    w4 = ibis.window(group_by=t.a, order_by=t.e)
    w5 = w3.combine(w4)
    expected = ibis.window(
        group_by=[t.g, t.a], order_by=[t.f, t.e], preceding=5, following=5
    )
    assert_equal(w5, expected)

    # Cannot combine windows of varying types.
    w6 = ibis.range_window(preceding=5, following=5)
    with pytest.raises(ibis.common.IbisInputError):
        w1.combine(w6)
def test_combine_windows(alltypes):
    t = alltypes
    w1 = ibis.window(group_by=t.g, order_by=t.f)
    w2 = ibis.window(preceding=5, following=5)

    w3 = w1.combine(w2)
    expected = ibis.window(
        group_by=t.g, order_by=t.f, preceding=5, following=5
    )
    assert_equal(w3, expected)

    w4 = ibis.window(group_by=t.a, order_by=t.e)
    w5 = w3.combine(w4)
    expected = ibis.window(
        group_by=[t.g, t.a], order_by=[t.f, t.e], preceding=5, following=5
    )
    assert_equal(w5, expected)

    # Cannot combine windows of varying types.
    w6 = ibis.range_window(preceding=5, following=5)
    with pytest.raises(ibis.common.exceptions.IbisInputError):
        w1.combine(w6)
Exemple #6
0
def test_grouped_bounded_range_window(backend, alltypes, df, con):
    if not backend.supports_window_operations:
        pytest.skip(
            'Backend {} does not support window operations'.format(backend))

    # Explanation of the range window spec below:
    #
    # `preceding=10, following=0, order_by='id'``:
    #     The window at a particular row (call its `id` value x) will contain
    #     some other row (call its `id` value y) if x-10 <= y <= x.
    # `group_by='string_col'`:
    #     The window at a particular row will only contain other rows that
    #     have the same 'string_col' value.
    #
    window = ibis.range_window(
        preceding=10,
        following=0,
        order_by='id',
        group_by='string_col',
    )
    expr = alltypes.mutate(val=alltypes.double_col.sum().over(window))
    result = expr.execute().set_index('id').sort_index()

    expected = (
        df.assign(
            # Mimic our range window spec using .apply()
            val=df.apply(
                lambda x: df.double_col[
                    (df.string_col == x.string_col)  # Grouping by string_col
                    & ((x.id - 10) <= df.id)  # Corresponds to `preceding=10`
                    & (df.id <= x.id)  # Corresponds to `following=0`
                ].sum(),
                axis=1,
            )).set_index('id').sort_index())

    left, right = result.val, expected.val

    backend.assert_series_equal(left, right)
Exemple #7
0
                group_by='key',
            ),
            (-3600, 0),
        ),
        param(
            ibis.trailing_window(
                preceding=ibis.interval(hours=2),
                order_by='time',
                group_by='key',
            ),
            (-7200, 0),
        ),
        param(
            ibis.range_window(
                preceding=0,
                following=ibis.interval(hours=1),
                order_by='time',
                group_by='key',
            ),
            (0, 3600),
        ),
    ],
)
def test_time_indexed_window(client, ibis_window, spark_range):
    table = client.table('time_indexed_table')
    result = table.mutate(
        mean=table['value'].mean().over(ibis_window)).compile()
    result_pd = result.toPandas()
    spark_table = table.compile()
    spark_window = (Window.partitionBy('key').orderBy(
        F.col('time').cast('long')).rangeBetween(*spark_range))
    expected = spark_table.withColumn(