Example #1
0
def test_gc():
    sdf = sd.Random(freq='5ms', interval='100ms')
    a = StreamingDataFrame({
        'volatility': sdf.x.rolling('100ms').var(),
        'sub': sdf.x - sdf.x.rolling('100ms').mean()
    })
    n = len(sdf.stream.downstreams)
    yield gen.sleep(0.1)
    a = StreamingDataFrame({
        'volatility': sdf.x.rolling('100ms').var(),
        'sub': sdf.x - sdf.x.rolling('100ms').mean()
    })
    yield gen.sleep(0.1)
    a = StreamingDataFrame({
        'volatility': sdf.x.rolling('100ms').var(),
        'sub': sdf.x - sdf.x.rolling('100ms').mean()
    })
    yield gen.sleep(0.1)
    a = StreamingDataFrame({
        'volatility': sdf.x.rolling('100ms').var(),
        'sub': sdf.x - sdf.x.rolling('100ms').mean()
    })

    assert len(sdf.stream.downstreams) == n
    del a
    import gc
    gc.collect()
    assert len(sdf.stream.downstreams) == 0
Example #2
0
def test_to_frame(stream):
    df = pd.DataFrame({'x': [1, 2, 3], 'y': [4, 5, 6]})
    sdf = StreamingDataFrame(example=df, stream=stream)

    assert sdf.to_frame() is sdf

    a = sdf.x.to_frame()
    assert isinstance(a, StreamingDataFrame)
    assert list(a.columns) == ['x']
Example #3
0
def test_pair_arithmetic(stream):
    df = pd.DataFrame({'x': list(range(10)), 'y': [1] * 10})

    a = StreamingDataFrame(example=df.iloc[:0], stream=stream)
    L = ((a.x + a.y) * 2).stream.gather().sink_to_list()

    a.emit(df.iloc[:5])
    a.emit(df.iloc[5:])

    assert len(L) == 2
    assert_eq(pd.concat(L, axis=0), (df.x + df.y) * 2)
Example #4
0
def test_index(stream):
    df = pd.DataFrame({'x': [1, 2, 3], 'y': [4, 5, 6]})
    a = StreamingDataFrame(example=df, stream=stream)
    b = a.index + 5
    L = b.stream.gather().sink_to_list()

    a.emit(df)
    a.emit(df)

    assert_eq(L[0], df.index + 5)
    assert_eq(L[1], df.index + 5)
Example #5
0
def test_binary_stream_operators(stream):
    df = pd.DataFrame({'x': [1, 2, 3], 'y': [4, 5, 6]})

    expected = df.x + df.y

    a = StreamingDataFrame(example=df, stream=stream)
    b = (a.x + a.y).stream.gather().sink_to_list()

    a.emit(df)

    assert_eq(b[0], expected)
Example #6
0
def test_tail(stream):
    df = pd.DataFrame({'x': [1, 2, 3], 'y': [4, 5, 6]})
    sdf = StreamingDataFrame(example=df, stream=stream)

    L = sdf.tail(2).stream.gather().sink_to_list()

    sdf.emit(df)
    sdf.emit(df)

    assert_eq(L[0], df.tail(2))
    assert_eq(L[1], df.tail(2))
Example #7
0
def test_getitem(stream):
    df = pd.DataFrame({'x': list(range(10)), 'y': [1] * 10})

    a = StreamingDataFrame(example=df.iloc[:0], stream=stream)
    L = a[a.x > 4].stream.gather().sink_to_list()

    a.emit(df.iloc[:5])
    a.emit(df.iloc[5:])

    assert len(L) == 2
    assert_eq(pd.concat(L, axis=0), df[df.x > 4])
Example #8
0
def test_unary_operators(op, getter):
    df = pd.DataFrame({'x': [1, 2, 3], 'y': [4, 5, 6]})
    try:
        expected = op(getter(df))
    except Exception:
        return

    a = StreamingDataFrame(example=df)
    b = op(getter(a)).stream.sink_to_list()

    a.emit(df)

    assert_eq(b[0], expected)
Example #9
0
def test_dtype(stream):
    df = pd.DataFrame({'x': [1, 2, 3], 'y': [4, 5, 6]})
    sdf = StreamingDataFrame(example=df, stream=stream)

    assert str(sdf.dtypes) == str(df.dtypes)
    assert sdf.x.dtype == df.x.dtype
    assert sdf.index.dtype == df.index.dtype
Example #10
0
def test_binary_operators(op, getter, stream):
    df = pd.DataFrame({'x': [1, 2, 3], 'y': [4, 5, 6]})
    try:
        left = op(getter(df), 2)
        right = op(2, getter(df))
    except Exception:
        return

    a = StreamingDataFrame(example=df, stream=stream)
    l = op(getter(a), 2).stream.gather().sink_to_list()
    r = op(2, getter(a)).stream.gather().sink_to_list()

    a.emit(df)

    assert_eq(l[0], left)
    assert_eq(r[0], right)
Example #11
0
def test_example_type_error_message():
    try:
        sdf = StreamingDataFrame(example=[123])
    except Exception as e:
        assert 'StreamingDataFrame' in str(e)
        assert 'DataFrame' in str(e)
        assert '[123]' in str(e)
Example #12
0
def test_rolling_count_aggregations(op, window, m, pre_get, post_get, kwargs):
    index = pd.DatetimeIndex(start='2000-01-01', end='2000-01-03', freq='1h')
    df = pd.DataFrame({'x': np.arange(len(index))}, index=index)

    expected = getattr(post_get(pre_get(df).rolling(window)), op)(**kwargs)

    sdf = StreamingDataFrame(example=df.iloc[:0])
    roll = getattr(post_get(pre_get(sdf).rolling(window)), op)(**kwargs)
    L = roll.stream.sink_to_list()
    assert len(L) == 0

    for i in range(0, len(df), m):
        sdf.emit(df.iloc[i:i + m])

    assert len(L) > 1

    assert_eq(pd.concat(L), expected)
Example #13
0
def test_sum(stream):
    df = pd.DataFrame({'x': [1, 2, 3], 'y': [4, 5, 6]})
    sdf = StreamingDataFrame(example=df, stream=stream)
    df_out = sdf.sum().stream.gather().sink_to_list()

    x = sdf.x
    x_out = (x.sum() + 1).stream.gather().sink_to_list()

    df = pd.DataFrame({'x': [1, 2, 3], 'y': [4, 5, 6]})
    sdf.emit(df)
    sdf.emit(df)

    assert assert_eq(df_out[0], df.sum())
    assert assert_eq(df_out[1], df.sum() + df.sum())

    assert x_out[0] == df.x.sum() + 1
    assert x_out[1] == df.x.sum() + df.x.sum() + 1
Example #14
0
def test_groupby_aggregate(agg, grouper, indexer):
    df = pd.DataFrame({
        'x': (np.arange(10) // 2).astype(float),
        'y': [1.0] * 10
    })

    a = StreamingDataFrame(example=df.iloc[:0])

    L = getattr(indexer(a.groupby(grouper(a))), agg)().stream.sink_to_list()

    a.emit(df.iloc[:3])
    a.emit(df.iloc[3:7])
    a.emit(df.iloc[7:])

    assert assert_eq(L[-1], getattr(indexer(df.groupby(grouper(df))), agg)())
Example #15
0
def test_attributes():
    df = pd.DataFrame({'x': [1, 2, 3], 'y': [4, 5, 6]})
    sdf = StreamingDataFrame(example=df)

    assert 'x' in dir(sdf)
    assert 'z' not in dir(sdf)

    sdf.x
    with pytest.raises(AttributeError):
        sdf.z
Example #16
0
def test_display(stream):
    pytest.importorskip('ipywidgets')
    pytest.importorskip('IPython')

    df = pd.DataFrame({'x': [1, 2, 3], 'y': [4, 5, 6]})
    sdf = StreamingDataFrame(example=df, stream=stream)

    s = sdf.x.sum()

    s._ipython_display_()
Example #17
0
def test_mean(stream):
    df = pd.DataFrame({'x': [1, 2, 3], 'y': [4, 5, 6]})
    sdf = StreamingDataFrame(example=df, stream=stream)
    mean = sdf.mean()
    assert isinstance(mean, StreamingSeries)
    df_out = mean.stream.gather().sink_to_list()

    x = sdf.x
    x_out = x.mean().stream.gather().sink_to_list()

    df = pd.DataFrame({'x': [1, 2, 3], 'y': [4, 5, 6]})
    sdf.emit(df)
    sdf.emit(df)

    assert assert_eq(df_out[0], df.mean())
    assert assert_eq(df_out[1], df.mean())

    assert x_out[0] == df.x.mean()
    assert x_out[1] == df.x.mean()
Example #18
0
def test_repr_html(stream):
    df = pd.DataFrame({
        'x': (np.arange(10) // 2).astype(float),
        'y': [1.0] * 10
    })
    a = StreamingDataFrame(example=df, stream=stream)

    for x in [a, a.y, a.y.mean()]:
        html = x._repr_html_()
        assert type(x).__name__ in html
        assert '1' in html
Example #19
0
def test_setitem(stream):
    df = pd.DataFrame({'x': list(range(10)), 'y': [1] * 10})

    sdf = StreamingDataFrame(example=df.iloc[:0], stream=stream)
    stream = sdf.stream

    sdf['z'] = sdf['x'] * 2
    sdf['a'] = 10
    sdf[['c', 'd']] = sdf[['x', 'y']]

    L = sdf.mean().stream.gather().sink_to_list()

    stream.emit(df.iloc[:3])
    stream.emit(df.iloc[3:7])
    stream.emit(df.iloc[7:])

    df['z'] = df['x'] * 2
    df['a'] = 10
    df[['c', 'd']] = df[['x', 'y']]

    assert_eq(L[-1], df.mean())
Example #20
0
def test_instantiate_with_dict(stream):
    df = pd.DataFrame({'x': [1, 2, 3], 'y': [4, 5, 6]})
    sdf = StreamingDataFrame(example=df, stream=stream)

    sdf2 = StreamingDataFrame({'a': sdf.x, 'b': sdf.x * 2, 'c': sdf.y % 2})
    L = sdf2.stream.gather().sink_to_list()
    assert len(sdf2.columns) == 3

    sdf.emit(df)
    sdf.emit(df)

    assert len(L) == 2
    for x in L:
        assert_eq(
            x[['a', 'b', 'c']],
            pd.DataFrame({
                'a': df.x,
                'b': df.x * 2,
                'c': df.y % 2
            },
                         columns=['a', 'b', 'c']))
Example #21
0
def test_exceptions(stream):
    df = pd.DataFrame({'x': [1, 2, 3], 'y': [4, 5, 6]})
    sdf = StreamingDataFrame(example=df, stream=stream)
    with pytest.raises(TypeError):
        sdf.emit(1)

    with pytest.raises(IndexError):
        sdf.emit(pd.DataFrame())
Example #22
0
def test_repr(stream):
    df = pd.DataFrame({
        'x': (np.arange(10) // 2).astype(float),
        'y': [1.0] * 10
    })
    a = StreamingDataFrame(example=df, stream=stream)

    text = repr(a)
    assert type(a).__name__ in text
    assert 'x' in text
    assert 'y' in text

    text = repr(a.x)
    assert type(a.x).__name__ in text
    assert 'x' in text

    text = repr(a.x.sum())
    assert type(a.x.sum()).__name__ in text
Example #23
0
def test_cumulative_aggregations(op, getter, stream):
    df = pd.DataFrame({'x': list(range(10)), 'y': [1] * 10})
    expected = getattr(getter(df), op)()

    sdf = StreamingDataFrame(example=df, stream=stream)

    L = getattr(getter(sdf), op)().stream.gather().sink_to_list()

    for i in range(0, 10, 3):
        sdf.emit(df.iloc[i:i + 3])
    sdf.emit(df.iloc[:0])

    assert len(L) > 1

    assert_eq(pd.concat(L), expected)
Example #24
0
def test_identity(stream):
    df = pd.DataFrame({'x': [1, 2, 3], 'y': [4, 5, 6]})
    sdf = StreamingDataFrame(example=df, stream=stream)
    L = sdf.stream.gather().sink_to_list()

    sdf.emit(df)

    assert L[0] is df
    assert list(sdf.example.columns) == ['x', 'y']

    x = sdf.x
    assert isinstance(x, StreamingSeries)
    L2 = x.stream.gather().sink_to_list()
    assert not L2

    sdf.emit(df)
    assert isinstance(L2[0], pd.Series)
    assert assert_eq(L2[0], df.x)