def test_exceptions(): sdf = StreamingDataFrame(columns=['x', 'y']) with pytest.raises(TypeError): sdf.emit(1) with pytest.raises(IndexError): sdf.emit(pd.DataFrame())
def test_pair_arithmetic(): df = pd.DataFrame({'x': list(range(10)), 'y': [1] * 10}) a = StreamingDataFrame(example=df.iloc[0]) L = ((a.x + a.y) * 2).sink_to_list() a.emit(df.iloc[:5]) a.emit(df.iloc[5:]) assert len(L) == 2 assert_eq(pd.concat(L, axis=0), (df.x + df.y) * 2)
def test_arithmetic(): a = StreamingDataFrame(columns=['x', 'y']) b = a + 1 L1 = b.stream.sink_to_list() c = b.x * 10 L2 = c.stream.sink_to_list() df = pd.DataFrame({'x': [1, 2, 3], 'y': [4, 5, 6]}) a.emit(df) assert assert_eq(L1[0], df + 1) assert assert_eq(L2[0], (df + 1).x * 10)
def test_sum(): sdf = StreamingDataFrame(columns=['x', 'y']) df_out = sdf.sum().stream.sink_to_list() x = sdf.x x_out = (x.sum() + 1).stream.sink_to_list() df = pd.DataFrame({'x': [1, 2, 3], 'y': [4, 5, 6]}) sdf.emit(df) sdf.emit(df) assert assert_eq(df_out[0], df.sum()) assert assert_eq(df_out[1], df.sum() + df.sum()) assert x_out[0] == df.x.sum() + 1 assert x_out[1] == df.x.sum() + df.x.sum() + 1
def test_mean(): sdf = StreamingDataFrame(columns=['x', 'y']) mean = sdf.mean() assert isinstance(mean, StreamingSeries) df_out = mean.stream.sink_to_list() x = sdf.x x_out = x.mean().stream.sink_to_list() df = pd.DataFrame({'x': [1, 2, 3], 'y': [4, 5, 6]}) sdf.emit(df) sdf.emit(df) assert assert_eq(df_out[0], df.mean()) assert assert_eq(df_out[1], df.mean()) assert x_out[0] == df.x.mean() assert x_out[1] == df.x.mean()
def test_identity(): sdf = StreamingDataFrame(columns=['x', 'y']) L = sdf.stream.sink_to_list() df = pd.DataFrame({'x': [1, 2, 3], 'y': [4, 5, 6]}) sdf.emit(df) assert L[0] is df assert list(sdf.example.columns) == ['x', 'y'] x = sdf.x assert isinstance(x, StreamingSeries) L2 = x.stream.sink_to_list() assert not L2 sdf.emit(df) assert isinstance(L2[0], pd.Series) assert assert_eq(L2[0], df.x)
def test_rolling(): df = pd.DataFrame({'x': [1, 2, 3], 'y': [4, 5, 6]}) sdf = StreamingDataFrame(example=df) out = sdf.rolling(3, min_periods=2) L = out.stream.sink_to_list() for i in range(10): df = pd.DataFrame({'x': [i], 'y': [i + 1]}) sdf.emit(df) assert len(L) == 9 assert all(len(df) >= 2 for df in L) for i, df in enumerate(L[1:]): expected = pd.DataFrame({ 'x': [i, i + 1, i + 2], 'y': [i + 1, i + 2, i + 3] }) tm.assert_frame_equal(df.reset_index(drop=True), expected)
def test_rolling_time(): now = pd.Timestamp.now() df = pd.DataFrame({'x': [1], 'y': [4]}, index=[now]) sdf = StreamingDataFrame(example=df) L = sdf.rolling('10ms').stream.sink_to_list() for i in range(100): df = pd.DataFrame({ 'x': [i, i + 0.5], 'y': [i, i + 0.5] }, index=[pd.Timestamp.now(), pd.Timestamp.now()]) sdf.emit(df) time.sleep(0.001) assert L for df in L[3:]: assert df.index.max() - df.index.min() < pd.Timedelta('10ms') assert df.index.max() - df.index.min() > pd.Timedelta('1ms')
def test_groupby_aggregate(agg, grouper, indexer): df = pd.DataFrame({ 'x': (np.arange(10) // 2).astype(float), 'y': [1.0] * 10 }) a = StreamingDataFrame(example=df.iloc[:0]) L = getattr(indexer(a.groupby(grouper(a))), agg)().stream.sink_to_list() a.emit(df.iloc[:3]) a.emit(df.iloc[3:7]) a.emit(df.iloc[7:]) assert assert_eq(L[-1], getattr(indexer(df.groupby(grouper(df))), agg)())