Esempio n. 1
0
def test_exceptions():
    sdf = StreamingDataFrame(columns=['x', 'y'])
    with pytest.raises(TypeError):
        sdf.emit(1)

    with pytest.raises(IndexError):
        sdf.emit(pd.DataFrame())
Esempio n. 2
0
def test_pair_arithmetic():
    df = pd.DataFrame({'x': list(range(10)), 'y': [1] * 10})

    a = StreamingDataFrame(example=df.iloc[0])
    L = ((a.x + a.y) * 2).sink_to_list()

    a.emit(df.iloc[:5])
    a.emit(df.iloc[5:])

    assert len(L) == 2
    assert_eq(pd.concat(L, axis=0), (df.x + df.y) * 2)
Esempio n. 3
0
def test_arithmetic():
    a = StreamingDataFrame(columns=['x', 'y'])
    b = a + 1

    L1 = b.stream.sink_to_list()

    c = b.x * 10

    L2 = c.stream.sink_to_list()

    df = pd.DataFrame({'x': [1, 2, 3], 'y': [4, 5, 6]})
    a.emit(df)

    assert assert_eq(L1[0], df + 1)
    assert assert_eq(L2[0], (df + 1).x * 10)
Esempio n. 4
0
def test_sum():
    sdf = StreamingDataFrame(columns=['x', 'y'])
    df_out = sdf.sum().stream.sink_to_list()

    x = sdf.x
    x_out = (x.sum() + 1).stream.sink_to_list()

    df = pd.DataFrame({'x': [1, 2, 3], 'y': [4, 5, 6]})
    sdf.emit(df)
    sdf.emit(df)

    assert assert_eq(df_out[0], df.sum())
    assert assert_eq(df_out[1], df.sum() + df.sum())

    assert x_out[0] == df.x.sum() + 1
    assert x_out[1] == df.x.sum() + df.x.sum() + 1
Esempio n. 5
0
def test_mean():
    sdf = StreamingDataFrame(columns=['x', 'y'])
    mean = sdf.mean()
    assert isinstance(mean, StreamingSeries)
    df_out = mean.stream.sink_to_list()

    x = sdf.x
    x_out = x.mean().stream.sink_to_list()

    df = pd.DataFrame({'x': [1, 2, 3], 'y': [4, 5, 6]})
    sdf.emit(df)
    sdf.emit(df)

    assert assert_eq(df_out[0], df.mean())
    assert assert_eq(df_out[1], df.mean())

    assert x_out[0] == df.x.mean()
    assert x_out[1] == df.x.mean()
Esempio n. 6
0
def test_identity():
    sdf = StreamingDataFrame(columns=['x', 'y'])
    L = sdf.stream.sink_to_list()

    df = pd.DataFrame({'x': [1, 2, 3], 'y': [4, 5, 6]})
    sdf.emit(df)

    assert L[0] is df
    assert list(sdf.example.columns) == ['x', 'y']

    x = sdf.x
    assert isinstance(x, StreamingSeries)
    L2 = x.stream.sink_to_list()
    assert not L2

    sdf.emit(df)
    assert isinstance(L2[0], pd.Series)
    assert assert_eq(L2[0], df.x)
Esempio n. 7
0
def test_rolling():
    df = pd.DataFrame({'x': [1, 2, 3], 'y': [4, 5, 6]})

    sdf = StreamingDataFrame(example=df)
    out = sdf.rolling(3, min_periods=2)
    L = out.stream.sink_to_list()

    for i in range(10):
        df = pd.DataFrame({'x': [i], 'y': [i + 1]})
        sdf.emit(df)

    assert len(L) == 9
    assert all(len(df) >= 2 for df in L)
    for i, df in enumerate(L[1:]):
        expected = pd.DataFrame({
            'x': [i, i + 1, i + 2],
            'y': [i + 1, i + 2, i + 3]
        })
        tm.assert_frame_equal(df.reset_index(drop=True), expected)
Esempio n. 8
0
def test_rolling_time():
    now = pd.Timestamp.now()
    df = pd.DataFrame({'x': [1], 'y': [4]}, index=[now])

    sdf = StreamingDataFrame(example=df)
    L = sdf.rolling('10ms').stream.sink_to_list()

    for i in range(100):
        df = pd.DataFrame({
            'x': [i, i + 0.5],
            'y': [i, i + 0.5]
        },
                          index=[pd.Timestamp.now(),
                                 pd.Timestamp.now()])
        sdf.emit(df)
        time.sleep(0.001)

    assert L
    for df in L[3:]:
        assert df.index.max() - df.index.min() < pd.Timedelta('10ms')
        assert df.index.max() - df.index.min() > pd.Timedelta('1ms')
Esempio n. 9
0
def test_groupby_aggregate(agg, grouper, indexer):
    df = pd.DataFrame({
        'x': (np.arange(10) // 2).astype(float),
        'y': [1.0] * 10
    })

    a = StreamingDataFrame(example=df.iloc[:0])

    L = getattr(indexer(a.groupby(grouper(a))), agg)().stream.sink_to_list()

    a.emit(df.iloc[:3])
    a.emit(df.iloc[3:7])
    a.emit(df.iloc[7:])

    assert assert_eq(L[-1], getattr(indexer(df.groupby(grouper(df))), agg)())