def test_aggregate_with_nat_size():
    # GH 9925
    n = 20
    data = np.random.randn(n, 4).astype('int64')
    normal_df = DataFrame(data, columns=['A', 'B', 'C', 'D'])
    normal_df['key'] = [1, 2, np.nan, 4, 5] * 4

    dt_df = DataFrame(data, columns=['A', 'B', 'C', 'D'])
    dt_df['key'] = [
        datetime(2013, 1, 1),
        datetime(2013, 1, 2), pd.NaT,
        datetime(2013, 1, 4),
        datetime(2013, 1, 5)
    ] * 4

    normal_grouped = normal_df.groupby('key')
    dt_grouped = dt_df.groupby(TimeGrouper(key='key', freq='D'))

    normal_result = normal_grouped.size()
    dt_result = dt_grouped.size()

    pad = Series([0], index=[3])
    expected = normal_result.append(pad)
    expected = expected.sort_index()
    expected.index = date_range(start='2013-01-01',
                                freq='D',
                                periods=5,
                                name='key')
    assert_series_equal(expected, dt_result)
    assert dt_result.index.name == 'key'
def test_aggregate_with_nat(func, fill_value):
    # check TimeGrouper's aggregation is identical as normal groupby
    # if NaT is included, 'var', 'std', 'mean', 'first','last'
    # and 'nth' doesn't work yet

    n = 20
    data = np.random.randn(n, 4).astype('int64')
    normal_df = DataFrame(data, columns=['A', 'B', 'C', 'D'])
    normal_df['key'] = [1, 2, np.nan, 4, 5] * 4

    dt_df = DataFrame(data, columns=['A', 'B', 'C', 'D'])
    dt_df['key'] = [
        datetime(2013, 1, 1),
        datetime(2013, 1, 2), pd.NaT,
        datetime(2013, 1, 4),
        datetime(2013, 1, 5)
    ] * 4

    normal_grouped = normal_df.groupby('key')
    dt_grouped = dt_df.groupby(TimeGrouper(key='key', freq='D'))

    normal_result = getattr(normal_grouped, func)()
    dt_result = getattr(dt_grouped, func)()

    pad = DataFrame([[fill_value] * 4],
                    index=[3],
                    columns=['A', 'B', 'C', 'D'])
    expected = normal_result.append(pad)
    expected = expected.sort_index()
    expected.index = date_range(start='2013-01-01',
                                freq='D',
                                periods=5,
                                name='key')
    assert_frame_equal(expected, dt_result)
    assert dt_result.index.name == 'key'
Beispiel #3
0
 def __new__(cls, *args, **kwargs):
     from pandas.core.resample import TimeGrouper
     import warnings
     warnings.warn("pd.TimeGrouper is deprecated and will be removed; "
                   "Please use pd.Grouper(freq=...)",
                   FutureWarning, stacklevel=2)
     return TimeGrouper(*args, **kwargs)
def test_repr():
    # GH18203
    result = repr(TimeGrouper(key='A', freq='H'))
    expected = ("TimeGrouper(key='A', freq=<Hour>, axis=0, sort=True, "
                "closed='left', label='left', how='mean', "
                "convention='e', base=0)")
    assert result == expected
def test_aggregate_normal(resample_method):
    """Check TimeGrouper's aggregation is identical as normal groupby."""

    if resample_method == 'ohlc':
        pytest.xfail(reason='DataError: No numeric types to aggregate')

    data = np.random.randn(20, 4)
    normal_df = DataFrame(data, columns=['A', 'B', 'C', 'D'])
    normal_df['key'] = [1, 2, 3, 4, 5] * 4

    dt_df = DataFrame(data, columns=['A', 'B', 'C', 'D'])
    dt_df['key'] = [
        datetime(2013, 1, 1),
        datetime(2013, 1, 2),
        datetime(2013, 1, 3),
        datetime(2013, 1, 4),
        datetime(2013, 1, 5)
    ] * 4

    normal_grouped = normal_df.groupby('key')
    dt_grouped = dt_df.groupby(TimeGrouper(key='key', freq='D'))

    expected = getattr(normal_grouped, resample_method)()
    dt_result = getattr(dt_grouped, resample_method)()
    expected.index = date_range(start='2013-01-01',
                                freq='D',
                                periods=5,
                                name='key')
    tm.assert_equal(expected, dt_result)

    # if TimeGrouper is used included, 'nth' doesn't work yet
    """
Beispiel #6
0
def test_panel_aggregation():
    ind = pd.date_range('1/1/2000', periods=100)
    data = np.random.randn(2, len(ind), 4)

    wp = Panel(data, items=['Item1', 'Item2'], major_axis=ind,
               minor_axis=['A', 'B', 'C', 'D'])

    tg = TimeGrouper('M', axis=1)
    _, grouper, _ = tg._get_grouper(wp)
    bingrouped = wp.groupby(grouper)
    binagg = bingrouped.mean()

    def f(x):
        assert (isinstance(x, Panel))
        return x.mean(1)

    result = bingrouped.agg(f)
    tm.assert_panel_equal(result, binagg)
Beispiel #7
0
def test_apply_iteration():
    # #2300
    N = 1000
    ind = pd.date_range(start="2000-01-01", freq="D", periods=N)
    df = DataFrame({'open': 1, 'close': 2}, index=ind)
    tg = TimeGrouper('M')

    _, grouper, _ = tg._get_grouper(df)

    # Errors
    grouped = df.groupby(grouper, group_keys=False)

    def f(df):
        return df['close'] / df['open']

    # it works!
    result = grouped.apply(f)
    tm.assert_index_equal(result.index, df.index)
Beispiel #8
0
def test_resampler_is_iterable_all_ts(series):
    # GH 15314
    freq = 'H'
    tg = TimeGrouper(freq, convention='start')
    grouped = series.groupby(tg)
    resampled = series.resample(freq)
    for (rk, rv), (gk, gv) in zip(resampled, grouped):
        assert rk == gk
        assert_series_equal(rv, gv)
def test_panel_aggregation():
    ind = pd.date_range('1/1/2000', periods=100)
    data = np.random.randn(2, len(ind), 4)

    wp = Panel(data, items=['Item1', 'Item2'], major_axis=ind,
               minor_axis=['A', 'B', 'C', 'D'])

    tg = TimeGrouper('M', axis=1)
    _, grouper, _ = tg._get_grouper(wp)
    bingrouped = wp.groupby(grouper)
    binagg = bingrouped.mean()

    def f(x):
        assert (isinstance(x, Panel))
        return x.mean(1)

    result = bingrouped.agg(f)
    tm.assert_panel_equal(result, binagg)
def test_fails_on_no_datetime_index(name, func):
    n = 2
    index = func(n)
    df = DataFrame({'a': np.random.randn(n)}, index=index)

    msg = ("Only valid with DatetimeIndex, TimedeltaIndex "
           "or PeriodIndex, but got an instance of '{}'".format(name))
    with pytest.raises(TypeError, match=msg):
        df.groupby(TimeGrouper('D'))
def test_apply_iteration():
    # #2300
    N = 1000
    ind = pd.date_range(start="2000-01-01", freq="D", periods=N)
    df = DataFrame({'open': 1, 'close': 2}, index=ind)
    tg = TimeGrouper('M')

    _, grouper, _ = tg._get_grouper(df)

    # Errors
    grouped = df.groupby(grouper, group_keys=False)

    def f(df):
        return df['close'] / df['open']

    # it works!
    result = grouped.apply(f)
    tm.assert_index_equal(result.index, df.index)
def test_custom_grouper(index):

    dti = index
    s = Series(np.array([1] * len(dti)), index=dti, dtype='int64')

    b = TimeGrouper(Minute(5))
    g = s.groupby(b)

    # check all cython functions work
    funcs = ['add', 'mean', 'prod', 'ohlc', 'min', 'max', 'var']
    for f in funcs:
        g._cython_agg_general(f)

    b = TimeGrouper(Minute(5), closed='right', label='right')
    g = s.groupby(b)
    # check all cython functions work
    funcs = ['add', 'mean', 'prod', 'ohlc', 'min', 'max', 'var']
    for f in funcs:
        g._cython_agg_general(f)

    assert g.ngroups == 2593
    assert notna(g.mean()).all()

    # construct expected val
    arr = [1] + [5] * 2592
    idx = dti[0:-1:5]
    idx = idx.append(dti[-1:])
    expect = Series(arr, index=idx)

    # GH2763 - return in put dtype if we can
    result = g.agg(np.sum)
    assert_series_equal(result, expect)

    df = DataFrame(np.random.rand(len(dti), 10),
                   index=dti, dtype='float64')
    r = df.groupby(b).agg(np.sum)

    assert len(r.columns) == 10
    assert len(r.index) == 2593
Beispiel #13
0
def test_fails_on_no_datetime_index():
    index_names = ('Int64Index', 'Index', 'Float64Index', 'MultiIndex')
    index_funcs = (tm.makeIntIndex,
                   tm.makeUnicodeIndex, tm.makeFloatIndex,
                   lambda m: tm.makeCustomIndex(m, 2))
    n = 2
    for name, func in zip(index_names, index_funcs):
        index = func(n)
        df = DataFrame({'a': np.random.randn(n)}, index=index)

        msg = ("Only valid with DatetimeIndex, TimedeltaIndex "
               "or PeriodIndex, but got an instance of %r" % name)
        with pytest.raises(TypeError, match=msg):
            df.groupby(TimeGrouper('D'))
Beispiel #14
0
def test_aggregate_normal():
    # check TimeGrouper's aggregation is identical as normal groupby

    n = 20
    data = np.random.randn(n, 4)
    normal_df = DataFrame(data, columns=['A', 'B', 'C', 'D'])
    normal_df['key'] = [1, 2, 3, 4, 5] * 4

    dt_df = DataFrame(data, columns=['A', 'B', 'C', 'D'])
    dt_df['key'] = [datetime(2013, 1, 1), datetime(2013, 1, 2),
                    datetime(2013, 1, 3), datetime(2013, 1, 4),
                    datetime(2013, 1, 5)] * 4

    normal_grouped = normal_df.groupby('key')
    dt_grouped = dt_df.groupby(TimeGrouper(key='key', freq='D'))

    for func in ['min', 'max', 'prod', 'var', 'std', 'mean']:
        expected = getattr(normal_grouped, func)()
        dt_result = getattr(dt_grouped, func)()
        expected.index = date_range(start='2013-01-01', freq='D',
                                    periods=5, name='key')
        assert_frame_equal(expected, dt_result)

    for func in ['count', 'sum']:
        expected = getattr(normal_grouped, func)()
        expected.index = date_range(start='2013-01-01', freq='D',
                                    periods=5, name='key')
        dt_result = getattr(dt_grouped, func)()
        assert_frame_equal(expected, dt_result)

    # GH 7453
    for func in ['size']:
        expected = getattr(normal_grouped, func)()
        expected.index = date_range(start='2013-01-01', freq='D',
                                    periods=5, name='key')
        dt_result = getattr(dt_grouped, func)()
        assert_series_equal(expected, dt_result)

    # GH 7453
    for func in ['first', 'last']:
        expected = getattr(normal_grouped, func)()
        expected.index = date_range(start='2013-01-01', freq='D',
                                    periods=5, name='key')
        dt_result = getattr(dt_grouped, func)()
        assert_frame_equal(expected, dt_result)

    # if TimeGrouper is used included, 'nth' doesn't work yet

    """
def test_resample_frame_basic():
    df = tm.makeTimeDataFrame()

    b = TimeGrouper('M')
    g = df.groupby(b)

    # check all cython functions work
    funcs = ['add', 'mean', 'prod', 'min', 'max', 'var']
    for f in funcs:
        g._cython_agg_general(f)

    result = df.resample('A').mean()
    assert_series_equal(result['A'], df['A'].resample('A').mean())

    result = df.resample('M').mean()
    assert_series_equal(result['A'], df['A'].resample('M').mean())

    df.resample('M', kind='period').mean()
    df.resample('W-WED', kind='period').mean()
def test_aaa_group_order():
    # GH 12840
    # check TimeGrouper perform stable sorts
    n = 20
    data = np.random.randn(n, 4)
    df = DataFrame(data, columns=['A', 'B', 'C', 'D'])
    df['key'] = [
        datetime(2013, 1, 1),
        datetime(2013, 1, 2),
        datetime(2013, 1, 3),
        datetime(2013, 1, 4),
        datetime(2013, 1, 5)
    ] * 4
    grouped = df.groupby(TimeGrouper(key='key', freq='D'))

    tm.assert_frame_equal(grouped.get_group(datetime(2013, 1, 1)), df[::5])
    tm.assert_frame_equal(grouped.get_group(datetime(2013, 1, 2)), df[1::5])
    tm.assert_frame_equal(grouped.get_group(datetime(2013, 1, 3)), df[2::5])
    tm.assert_frame_equal(grouped.get_group(datetime(2013, 1, 4)), df[3::5])
    tm.assert_frame_equal(grouped.get_group(datetime(2013, 1, 5)), df[4::5])
def test_resample_ohlc(series):
    s = series

    grouper = TimeGrouper(Minute(5))
    expect = s.groupby(grouper).agg(lambda x: x[-1])
    result = s.resample('5Min').ohlc()

    assert len(result) == len(expect)
    assert len(result.columns) == 4

    xs = result.iloc[-2]
    assert xs['open'] == s[-6]
    assert xs['high'] == s[-6:-1].max()
    assert xs['low'] == s[-6:-1].min()
    assert xs['close'] == s[-2]

    xs = result.iloc[0]
    assert xs['open'] == s[0]
    assert xs['high'] == s[:5].max()
    assert xs['low'] == s[:5].min()
    assert xs['close'] == s[4]
def test_resample_basic_grouper(series):
    s = series
    result = s.resample('5Min').last()
    grouper = TimeGrouper(Minute(5), closed='left', label='left')
    expected = s.groupby(grouper).agg(lambda x: x[-1])
    assert_series_equal(result, expected)