Ejemplo n.º 1
0
def test_exceptions(stream):
    df = cudf.DataFrame({"x": [1, 2, 3], "y": [4, 5, 6]})
    sdf = DataFrame(example=df, stream=stream)
    with pytest.raises(TypeError):
        sdf.emit(1)

    with pytest.raises(IndexError):
        sdf.emit(cudf.DataFrame())
Ejemplo n.º 2
0
def test_exceptions(stream):
    df = pd.DataFrame({'x': [1, 2, 3], 'y': [4, 5, 6]})
    sdf = DataFrame(example=df, stream=stream)
    with pytest.raises(TypeError):
        sdf.emit(1)

    with pytest.raises(IndexError):
        sdf.emit(pd.DataFrame())
Ejemplo n.º 3
0
def test_dataframe_simple(func):
    df = pd.DataFrame({'x': [1, 2, 3], 'y': [4, 5, 6]})
    expected = func(df)

    a = DataFrame(example=df)
    L = func(a).stream.sink_to_list()

    a.emit(df)

    assert_eq(L[0], expected)
Ejemplo n.º 4
0
def test_getitem(stream):
    df = cudf.DataFrame({"x": list(range(10)), "y": [1] * 10})

    a = DataFrame(example=df.iloc[:0], stream=stream)
    L = a[a.x > 4].stream.gather().sink_to_list()

    a.emit(df.iloc[:5])
    a.emit(df.iloc[5:])

    assert len(L) == 2
    assert_eq(cudf.concat(L), df[df.x > 4])
Ejemplo n.º 5
0
def test_ewm_mean():
    sdf = DataFrame(example=pd.DataFrame(columns=['x', 'y']))
    L = sdf.ewm(1).mean().stream.gather().sink_to_list()
    sdf.emit(pd.DataFrame({'x': [1.], 'y': [2.]}))
    sdf.emit(pd.DataFrame({'x': [2.], 'y': [3.]}))
    sdf.emit(pd.DataFrame({'x': [3.], 'y': [4.]}))
    result = pd.concat(L, ignore_index=True)

    df = pd.DataFrame({'x': [1., 2., 3.], 'y': [2., 3., 4.]})
    expected = df.ewm(1).mean()
    assert_eq(result, expected)
Ejemplo n.º 6
0
def test_index(stream):
    df = pd.DataFrame({'x': [1, 2, 3], 'y': [4, 5, 6]})
    a = DataFrame(example=df, stream=stream)
    b = a.index + 5
    L = b.stream.gather().sink_to_list()

    a.emit(df)
    a.emit(df)

    assert_eq(L[0], df.index + 5)
    assert_eq(L[1], df.index + 5)
Ejemplo n.º 7
0
def test_pair_arithmetic(stream):
    df = pd.DataFrame({'x': list(range(10)), 'y': [1] * 10})

    a = DataFrame(example=df.iloc[:0], stream=stream)
    L = ((a.x + a.y) * 2).stream.gather().sink_to_list()

    a.emit(df.iloc[:5])
    a.emit(df.iloc[5:])

    assert len(L) == 2
    assert_eq(pd.concat(L, axis=0), (df.x + df.y) * 2)
Ejemplo n.º 8
0
def test_getitem(stream):
    df = pd.DataFrame({'x': list(range(10)), 'y': [1] * 10})

    a = DataFrame(example=df.iloc[:0], stream=stream)
    L = a[a.x > 4].stream.gather().sink_to_list()

    a.emit(df.iloc[:5])
    a.emit(df.iloc[5:])

    assert len(L) == 2
    assert_eq(pd.concat(L, axis=0), df[df.x > 4])
Ejemplo n.º 9
0
def test_tail(stream):
    df = pd.DataFrame({'x': [1, 2, 3], 'y': [4, 5, 6]})
    sdf = DataFrame(example=df, stream=stream)

    L = sdf.tail(2).stream.gather().sink_to_list()

    sdf.emit(df)
    sdf.emit(df)

    assert_eq(L[0], df.tail(2))
    assert_eq(L[1], df.tail(2))
Ejemplo n.º 10
0
def test_binary_stream_operators(stream):
    df = pd.DataFrame({'x': [1, 2, 3], 'y': [4, 5, 6]})

    expected = df.x + df.y

    a = DataFrame(example=df, stream=stream)
    b = (a.x + a.y).stream.gather().sink_to_list()

    a.emit(df)

    assert_eq(b[0], expected)
Ejemplo n.º 11
0
def test_set_index():
    df = cudf.DataFrame({'x': [1, 2, 3], 'y': [4, 5, 6]})

    a = DataFrame(example=df)

    b = a.set_index('x').stream.sink_to_list()
    a.emit(df)
    assert_eq(b[0], df.set_index('x'))

    b = a.set_index(a.y + 1).stream.sink_to_list()
    a.emit(df)
    assert_eq(b[0], df.set_index(df.y + 1))
Ejemplo n.º 12
0
def test_expanding(func):
    df = pd.DataFrame({'x': [1.], 'y': [2.]})
    sdf = DataFrame(example=df)

    L = func(sdf.expanding()).stream.gather().sink_to_list()

    for i in range(5):
        sdf.emit(df)

    result = pd.concat(L, axis=1).T.astype(float)
    expected = func(pd.concat([df] * 5, ignore_index=True).expanding())
    assert_eq(result, expected)
Ejemplo n.º 13
0
def test_unary_operators(op, getter):
    df = pd.DataFrame({'x': [1, 2, 3], 'y': [4, 5, 6]})
    try:
        expected = op(getter(df))
    except Exception:
        return

    a = DataFrame(example=df)
    b = op(getter(a)).stream.sink_to_list()

    a.emit(df)

    assert_eq(b[0], expected)
Ejemplo n.º 14
0
def test_index(stream):
    df = pd.DataFrame({'x': [1, 2, 3], 'y': [4, 5, 6]})
    a = DataFrame(example=df, stream=stream)
    b = a.index + 5
    L = b.stream.gather().sink_to_list()

    a.emit(df)
    a.emit(df)

    wait_for(lambda: len(L) > 1, timeout=2, period=0.05)

    assert_eq(L[0], df.index + 5)
    assert_eq(L[1], df.index + 5)
Ejemplo n.º 15
0
def test_windowing_n(func, n, getter):
    df = pd.DataFrame({'x': list(range(10)), 'y': [1, 2] * 5})

    sdf = DataFrame(example=df)
    L = func(getter(sdf).window(n=n) + 10).stream.gather().sink_to_list()

    for i in range(0, 10, 3):
        sdf.emit(df.iloc[i: i + 3])
    sdf.emit(df.iloc[:0])

    assert len(L) == 5

    assert_eq(L[0], func(getter(df).iloc[max(0, 3 - n): 3] + 10))
    assert_eq(L[-1], func(getter(df).iloc[len(df) - n:] + 10))
Ejemplo n.º 16
0
def test_custom_aggregation():
    df = pd.DataFrame({'x': np.arange(10, dtype=float), 'y': [1.0, 2.0] * 5})

    class Custom(Aggregation):
        def initial(self, new):
            return 0

        def on_new(self, state, new):
            return state + 1, state

        def on_old(self, state, new):
            return state - 100, state

    sdf = DataFrame(example=df)
    L = sdf.aggregate(Custom()).stream.sink_to_list()

    sdf.emit(df)
    sdf.emit(df)
    sdf.emit(df)

    assert L == [0, 1, 2]

    sdf = DataFrame(example=df)
    L = sdf.window(n=5).aggregate(Custom()).stream.sink_to_list()

    sdf.emit(df)
    sdf.emit(df)
    sdf.emit(df)

    assert L == [1, -198, -397]
Ejemplo n.º 17
0
def test_reductions(stream, func):
    df = pd.DataFrame({'x': [1, 2, 3], 'y': [4, 5, 6]})
    sdf = DataFrame(example=df, stream=stream)

    df_out = func(sdf).stream.gather().sink_to_list()

    x = sdf.x
    x_out = func(x).stream.gather().sink_to_list()

    sdf.emit(df)
    sdf.emit(df)

    assert_eq(df_out[-1], func(pd.concat([df, df])))
    assert_eq(x_out[-1], func(pd.concat([df, df]).x))
Ejemplo n.º 18
0
def test_reductions(stream, func):
    df = cudf.DataFrame({"x": [1, 2, 3], "y": [4, 5, 6]})
    for example in [df, df.iloc[:0]]:
        sdf = DataFrame(example=example, stream=stream)

        df_out = func(sdf).stream.gather().sink_to_list()

        x = sdf.x
        x_out = func(x).stream.gather().sink_to_list()

        sdf.emit(df)
        sdf.emit(df)

        assert_eq(df_out[-1], func(cudf.concat([df, df])))
        assert_eq(x_out[-1], func(cudf.concat([df, df]).x))
Ejemplo n.º 19
0
def test_cumulative_aggregations(op, getter, stream):
    df = pd.DataFrame({'x': list(range(10)), 'y': [1] * 10})
    expected = getattr(getter(df), op)()

    sdf = DataFrame(example=df, stream=stream)

    L = getattr(getter(sdf), op)().stream.gather().sink_to_list()

    for i in range(0, 10, 3):
        sdf.emit(df.iloc[i: i + 3])
    sdf.emit(df.iloc[:0])

    assert len(L) > 1

    assert_eq(pd.concat(L), expected)
Ejemplo n.º 20
0
def test_instantiate_with_dict(stream):
    df = cudf.DataFrame({'x': [1, 2, 3], 'y': [4, 5, 6]})
    sdf = DataFrame(example=df, stream=stream)

    sdf2 = DataFrame({'a': sdf.x, 'b': sdf.x * 2,
                      'c': sdf.y % 2})
    L = sdf2.stream.gather().sink_to_list()
    assert len(sdf2.columns) == 3

    sdf.emit(df)
    sdf.emit(df)

    assert len(L) == 2
    for x in L:
        assert_eq(x[['a', 'b', 'c']],
                  cudf.DataFrame({'a': df.x, 'b': df.x * 2, 'c': df.y % 2}))
Ejemplo n.º 21
0
def test_binary_operators(op, getter, stream):
    df = pd.DataFrame({'x': [1, 2, 3], 'y': [4, 5, 6]})
    try:
        left = op(getter(df), 2)
        right = op(2, getter(df))
    except Exception:
        return

    a = DataFrame(example=df, stream=stream)
    l = op(getter(a), 2).stream.gather().sink_to_list()
    r = op(2, getter(a)).stream.gather().sink_to_list()

    a.emit(df)

    assert_eq(l[0], left)
    assert_eq(r[0], right)
Ejemplo n.º 22
0
def test_rolling_count_aggregations(op, window, m, pre_get, post_get, kwargs,
        stream):
    index = pd.DatetimeIndex(start='2000-01-01', end='2000-01-03', freq='1h')
    df = pd.DataFrame({'x': np.arange(len(index))}, index=index)

    expected = getattr(post_get(pre_get(df).rolling(window)), op)(**kwargs)

    sdf = DataFrame(example=df.iloc[:0], stream=stream)
    roll = getattr(post_get(pre_get(sdf).rolling(window)), op)(**kwargs)
    L = roll.stream.gather().sink_to_list()
    assert len(L) == 0

    for i in range(0, len(df), m):
        sdf.emit(df.iloc[i: i + m])

    assert len(L) > 1

    assert_eq(pd.concat(L), expected)
Ejemplo n.º 23
0
def test_identity(stream):
    df = pd.DataFrame({'x': [1, 2, 3], 'y': [4, 5, 6]})
    sdf = DataFrame(example=df, stream=stream)
    L = sdf.stream.gather().sink_to_list()

    sdf.emit(df)

    assert L[0] is df
    assert list(sdf.example.columns) == ['x', 'y']

    x = sdf.x
    assert isinstance(x, Series)
    L2 = x.stream.gather().sink_to_list()
    assert not L2

    sdf.emit(df)
    assert isinstance(L2[0], pd.Series)
    assert assert_eq(L2[0], df.x)
Ejemplo n.º 24
0
def test_rolling_count_aggregations(op, window, m, pre_get, post_get, kwargs,
                                    stream):
    index = pd.DatetimeIndex(
        pd.date_range("2000-01-01", "2000-01-03", freq="1h"))
    df = cudf.DataFrame({"x": np.arange(len(index))}, index=index)

    expected = getattr(post_get(pre_get(df).rolling(window)), op)(**kwargs)

    sdf = DataFrame(example=df, stream=stream)
    roll = getattr(post_get(pre_get(sdf).rolling(window)), op)(**kwargs)
    L = roll.stream.gather().sink_to_list()
    assert len(L) == 0

    for i in range(0, len(df), m):
        sdf.emit(df.iloc[i:i + m])

    assert len(L) > 1

    assert_eq(cudf.concat(L), expected)
Ejemplo n.º 25
0
def test_groupby_windowing_n(func, n, getter, grouper, indexer):
    df = pd.DataFrame({'x': np.arange(10, dtype=float), 'y': [1.0, 2.0] * 5})

    sdf = DataFrame(example=df)

    def f(x):
        return func(indexer(x.groupby(grouper(x))))

    L = f(sdf.window(n=n)).stream.gather().sink_to_list()

    diff = 3
    for i in range(0, 10, diff):
        sdf.emit(df.iloc[i: i + diff])
    sdf.emit(df.iloc[:0])

    assert len(L) == 5

    first = df.iloc[max(0, diff - n): diff]
    assert_eq(L[0], f(first))

    last = df.iloc[len(df) - n:]
    assert_eq(L[-1], f(last))
Ejemplo n.º 26
0
def test_groupby_windowing_value(func, value, getter, grouper, indexer):
    index = pd.DatetimeIndex(
        pd.date_range("2000-01-01", "2000-01-03", freq="1h")
    )
    df = cudf.DataFrame(
        {
            "x": np.arange(len(index), dtype=float),
            "y": np.arange(len(index), dtype=float) % 2,
        },
        index=index,
    )

    value = pd.Timedelta(value)

    sdf = DataFrame(example=df)

    def f(x):
        return func(indexer(x.groupby(grouper(x))))

    L = f(sdf.window(value)).stream.gather().sink_to_list()

    diff = 13
    for i in range(0, len(index), diff):
        sdf.emit(df.iloc[i : i + diff])

    assert len(L) == 4

    first = df.iloc[:diff]
    lost = first.loc[first.index.min() + value :]
    first = first.iloc[len(lost) :]

    g = f(first)
    assert_eq(L[0], g)

    last = df.loc[index.max() - value + pd.Timedelta("1s") :]
    h = f(last)
    assert_eq(L[-1], h)
Ejemplo n.º 27
0
def test_window_sum(stream):
    df = pd.DataFrame({'x': [1, 2, 3], 'y': [4, 5, 6]})
    sdf = DataFrame(example=df, stream=stream)
    L = sdf.window(n=4).x.sum().stream.gather().sink_to_list()

    sdf.emit(df)
    assert L == [6]
    sdf.emit(df)
    assert L == [6, 9]
    sdf.emit(df)
    assert L == [6, 9, 9]
Ejemplo n.º 28
0
def test_window_full():
    df = pd.DataFrame({'x': np.arange(10, dtype=float), 'y': [1.0, 2.0] * 5})

    sdf = DataFrame(example=df)

    L = sdf.window(n=4).apply(lambda x: x).stream.sink_to_list()

    sdf.emit(df.iloc[:3])
    sdf.emit(df.iloc[3:8])
    sdf.emit(df.iloc[8:])

    assert_eq(L[0], df.iloc[:3])
    assert_eq(L[1], df.iloc[4:8])
    assert_eq(L[2], df.iloc[-4:])
Ejemplo n.º 29
0
def test_window_sum_dataframe(stream):
    df = pd.DataFrame({'x': [1, 2, 3], 'y': [4, 5, 6]})
    sdf = DataFrame(example=df, stream=stream)
    L = sdf.window(n=4).sum().stream.gather().sink_to_list()

    sdf.emit(df)
    assert_eq(L[0], pd.Series([6, 15], index=['x', 'y']))
    sdf.emit(df)
    assert_eq(L[0], pd.Series([6, 15], index=['x', 'y']))
    assert_eq(L[1], pd.Series([9, 21], index=['x', 'y']))
    sdf.emit(df)
    assert_eq(L[0], pd.Series([6, 15], index=['x', 'y']))
    assert_eq(L[1], pd.Series([9, 21], index=['x', 'y']))
    assert_eq(L[2], pd.Series([9, 21], index=['x', 'y']))
Ejemplo n.º 30
0
def test_window_sum_dataframe(stream):
    df = cudf.DataFrame({"x": [1, 2, 3], "y": [4, 5, 6]})
    sdf = DataFrame(example=df, stream=stream)
    L = sdf.window(n=4).sum().stream.gather().sink_to_list()

    sdf.emit(df)
    assert_eq(L[0], cudf.Series([6, 15], index=["x", "y"]))
    sdf.emit(df)
    assert_eq(L[0], cudf.Series([6, 15], index=["x", "y"]))
    assert_eq(L[1], cudf.Series([9, 21], index=["x", "y"]))
    sdf.emit(df)
    assert_eq(L[0], cudf.Series([6, 15], index=["x", "y"]))
    assert_eq(L[1], cudf.Series([9, 21], index=["x", "y"]))
    assert_eq(L[2], cudf.Series([9, 21], index=["x", "y"]))