def test_gc(): sdf = sd.Random(freq='5ms', interval='100ms') a = StreamingDataFrame({ 'volatility': sdf.x.rolling('100ms').var(), 'sub': sdf.x - sdf.x.rolling('100ms').mean() }) n = len(sdf.stream.downstreams) yield gen.sleep(0.1) a = StreamingDataFrame({ 'volatility': sdf.x.rolling('100ms').var(), 'sub': sdf.x - sdf.x.rolling('100ms').mean() }) yield gen.sleep(0.1) a = StreamingDataFrame({ 'volatility': sdf.x.rolling('100ms').var(), 'sub': sdf.x - sdf.x.rolling('100ms').mean() }) yield gen.sleep(0.1) a = StreamingDataFrame({ 'volatility': sdf.x.rolling('100ms').var(), 'sub': sdf.x - sdf.x.rolling('100ms').mean() }) assert len(sdf.stream.downstreams) == n del a import gc gc.collect() assert len(sdf.stream.downstreams) == 0
def test_to_frame(stream): df = pd.DataFrame({'x': [1, 2, 3], 'y': [4, 5, 6]}) sdf = StreamingDataFrame(example=df, stream=stream) assert sdf.to_frame() is sdf a = sdf.x.to_frame() assert isinstance(a, StreamingDataFrame) assert list(a.columns) == ['x']
def test_pair_arithmetic(stream): df = pd.DataFrame({'x': list(range(10)), 'y': [1] * 10}) a = StreamingDataFrame(example=df.iloc[:0], stream=stream) L = ((a.x + a.y) * 2).stream.gather().sink_to_list() a.emit(df.iloc[:5]) a.emit(df.iloc[5:]) assert len(L) == 2 assert_eq(pd.concat(L, axis=0), (df.x + df.y) * 2)
def test_index(stream): df = pd.DataFrame({'x': [1, 2, 3], 'y': [4, 5, 6]}) a = StreamingDataFrame(example=df, stream=stream) b = a.index + 5 L = b.stream.gather().sink_to_list() a.emit(df) a.emit(df) assert_eq(L[0], df.index + 5) assert_eq(L[1], df.index + 5)
def test_binary_stream_operators(stream): df = pd.DataFrame({'x': [1, 2, 3], 'y': [4, 5, 6]}) expected = df.x + df.y a = StreamingDataFrame(example=df, stream=stream) b = (a.x + a.y).stream.gather().sink_to_list() a.emit(df) assert_eq(b[0], expected)
def test_tail(stream): df = pd.DataFrame({'x': [1, 2, 3], 'y': [4, 5, 6]}) sdf = StreamingDataFrame(example=df, stream=stream) L = sdf.tail(2).stream.gather().sink_to_list() sdf.emit(df) sdf.emit(df) assert_eq(L[0], df.tail(2)) assert_eq(L[1], df.tail(2))
def test_getitem(stream): df = pd.DataFrame({'x': list(range(10)), 'y': [1] * 10}) a = StreamingDataFrame(example=df.iloc[:0], stream=stream) L = a[a.x > 4].stream.gather().sink_to_list() a.emit(df.iloc[:5]) a.emit(df.iloc[5:]) assert len(L) == 2 assert_eq(pd.concat(L, axis=0), df[df.x > 4])
def test_unary_operators(op, getter): df = pd.DataFrame({'x': [1, 2, 3], 'y': [4, 5, 6]}) try: expected = op(getter(df)) except Exception: return a = StreamingDataFrame(example=df) b = op(getter(a)).stream.sink_to_list() a.emit(df) assert_eq(b[0], expected)
def test_dtype(stream): df = pd.DataFrame({'x': [1, 2, 3], 'y': [4, 5, 6]}) sdf = StreamingDataFrame(example=df, stream=stream) assert str(sdf.dtypes) == str(df.dtypes) assert sdf.x.dtype == df.x.dtype assert sdf.index.dtype == df.index.dtype
def test_binary_operators(op, getter, stream): df = pd.DataFrame({'x': [1, 2, 3], 'y': [4, 5, 6]}) try: left = op(getter(df), 2) right = op(2, getter(df)) except Exception: return a = StreamingDataFrame(example=df, stream=stream) l = op(getter(a), 2).stream.gather().sink_to_list() r = op(2, getter(a)).stream.gather().sink_to_list() a.emit(df) assert_eq(l[0], left) assert_eq(r[0], right)
def test_example_type_error_message(): try: sdf = StreamingDataFrame(example=[123]) except Exception as e: assert 'StreamingDataFrame' in str(e) assert 'DataFrame' in str(e) assert '[123]' in str(e)
def test_rolling_count_aggregations(op, window, m, pre_get, post_get, kwargs): index = pd.DatetimeIndex(start='2000-01-01', end='2000-01-03', freq='1h') df = pd.DataFrame({'x': np.arange(len(index))}, index=index) expected = getattr(post_get(pre_get(df).rolling(window)), op)(**kwargs) sdf = StreamingDataFrame(example=df.iloc[:0]) roll = getattr(post_get(pre_get(sdf).rolling(window)), op)(**kwargs) L = roll.stream.sink_to_list() assert len(L) == 0 for i in range(0, len(df), m): sdf.emit(df.iloc[i:i + m]) assert len(L) > 1 assert_eq(pd.concat(L), expected)
def test_sum(stream): df = pd.DataFrame({'x': [1, 2, 3], 'y': [4, 5, 6]}) sdf = StreamingDataFrame(example=df, stream=stream) df_out = sdf.sum().stream.gather().sink_to_list() x = sdf.x x_out = (x.sum() + 1).stream.gather().sink_to_list() df = pd.DataFrame({'x': [1, 2, 3], 'y': [4, 5, 6]}) sdf.emit(df) sdf.emit(df) assert assert_eq(df_out[0], df.sum()) assert assert_eq(df_out[1], df.sum() + df.sum()) assert x_out[0] == df.x.sum() + 1 assert x_out[1] == df.x.sum() + df.x.sum() + 1
def test_groupby_aggregate(agg, grouper, indexer): df = pd.DataFrame({ 'x': (np.arange(10) // 2).astype(float), 'y': [1.0] * 10 }) a = StreamingDataFrame(example=df.iloc[:0]) L = getattr(indexer(a.groupby(grouper(a))), agg)().stream.sink_to_list() a.emit(df.iloc[:3]) a.emit(df.iloc[3:7]) a.emit(df.iloc[7:]) assert assert_eq(L[-1], getattr(indexer(df.groupby(grouper(df))), agg)())
def test_attributes(): df = pd.DataFrame({'x': [1, 2, 3], 'y': [4, 5, 6]}) sdf = StreamingDataFrame(example=df) assert 'x' in dir(sdf) assert 'z' not in dir(sdf) sdf.x with pytest.raises(AttributeError): sdf.z
def test_display(stream): pytest.importorskip('ipywidgets') pytest.importorskip('IPython') df = pd.DataFrame({'x': [1, 2, 3], 'y': [4, 5, 6]}) sdf = StreamingDataFrame(example=df, stream=stream) s = sdf.x.sum() s._ipython_display_()
def test_mean(stream): df = pd.DataFrame({'x': [1, 2, 3], 'y': [4, 5, 6]}) sdf = StreamingDataFrame(example=df, stream=stream) mean = sdf.mean() assert isinstance(mean, StreamingSeries) df_out = mean.stream.gather().sink_to_list() x = sdf.x x_out = x.mean().stream.gather().sink_to_list() df = pd.DataFrame({'x': [1, 2, 3], 'y': [4, 5, 6]}) sdf.emit(df) sdf.emit(df) assert assert_eq(df_out[0], df.mean()) assert assert_eq(df_out[1], df.mean()) assert x_out[0] == df.x.mean() assert x_out[1] == df.x.mean()
def test_repr_html(stream): df = pd.DataFrame({ 'x': (np.arange(10) // 2).astype(float), 'y': [1.0] * 10 }) a = StreamingDataFrame(example=df, stream=stream) for x in [a, a.y, a.y.mean()]: html = x._repr_html_() assert type(x).__name__ in html assert '1' in html
def test_setitem(stream): df = pd.DataFrame({'x': list(range(10)), 'y': [1] * 10}) sdf = StreamingDataFrame(example=df.iloc[:0], stream=stream) stream = sdf.stream sdf['z'] = sdf['x'] * 2 sdf['a'] = 10 sdf[['c', 'd']] = sdf[['x', 'y']] L = sdf.mean().stream.gather().sink_to_list() stream.emit(df.iloc[:3]) stream.emit(df.iloc[3:7]) stream.emit(df.iloc[7:]) df['z'] = df['x'] * 2 df['a'] = 10 df[['c', 'd']] = df[['x', 'y']] assert_eq(L[-1], df.mean())
def test_instantiate_with_dict(stream): df = pd.DataFrame({'x': [1, 2, 3], 'y': [4, 5, 6]}) sdf = StreamingDataFrame(example=df, stream=stream) sdf2 = StreamingDataFrame({'a': sdf.x, 'b': sdf.x * 2, 'c': sdf.y % 2}) L = sdf2.stream.gather().sink_to_list() assert len(sdf2.columns) == 3 sdf.emit(df) sdf.emit(df) assert len(L) == 2 for x in L: assert_eq( x[['a', 'b', 'c']], pd.DataFrame({ 'a': df.x, 'b': df.x * 2, 'c': df.y % 2 }, columns=['a', 'b', 'c']))
def test_exceptions(stream): df = pd.DataFrame({'x': [1, 2, 3], 'y': [4, 5, 6]}) sdf = StreamingDataFrame(example=df, stream=stream) with pytest.raises(TypeError): sdf.emit(1) with pytest.raises(IndexError): sdf.emit(pd.DataFrame())
def test_repr(stream): df = pd.DataFrame({ 'x': (np.arange(10) // 2).astype(float), 'y': [1.0] * 10 }) a = StreamingDataFrame(example=df, stream=stream) text = repr(a) assert type(a).__name__ in text assert 'x' in text assert 'y' in text text = repr(a.x) assert type(a.x).__name__ in text assert 'x' in text text = repr(a.x.sum()) assert type(a.x.sum()).__name__ in text
def test_cumulative_aggregations(op, getter, stream): df = pd.DataFrame({'x': list(range(10)), 'y': [1] * 10}) expected = getattr(getter(df), op)() sdf = StreamingDataFrame(example=df, stream=stream) L = getattr(getter(sdf), op)().stream.gather().sink_to_list() for i in range(0, 10, 3): sdf.emit(df.iloc[i:i + 3]) sdf.emit(df.iloc[:0]) assert len(L) > 1 assert_eq(pd.concat(L), expected)
def test_identity(stream): df = pd.DataFrame({'x': [1, 2, 3], 'y': [4, 5, 6]}) sdf = StreamingDataFrame(example=df, stream=stream) L = sdf.stream.gather().sink_to_list() sdf.emit(df) assert L[0] is df assert list(sdf.example.columns) == ['x', 'y'] x = sdf.x assert isinstance(x, StreamingSeries) L2 = x.stream.gather().sink_to_list() assert not L2 sdf.emit(df) assert isinstance(L2[0], pd.Series) assert assert_eq(L2[0], df.x)