예제 #1
0
    def test_timegrouper_with_reg_groups_freq(self, freq):
        # GH 6764 multiple grouping with/without sort
        df = DataFrame(
            {
                "date": pd.to_datetime(
                    [
                        "20121002",
                        "20121007",
                        "20130130",
                        "20130202",
                        "20130305",
                        "20121002",
                        "20121207",
                        "20130130",
                        "20130202",
                        "20130305",
                        "20130202",
                        "20130305",
                    ]
                ),
                "user_id": [1, 1, 1, 1, 1, 3, 3, 3, 5, 5, 5, 5],
                "whole_cost": [
                    1790,
                    364,
                    280,
                    259,
                    201,
                    623,
                    90,
                    312,
                    359,
                    301,
                    359,
                    801,
                ],
                "cost1": [12, 15, 10, 24, 39, 1, 0, 90, 45, 34, 1, 12],
            }
        ).set_index("date")

        expected = (
            df.groupby("user_id")["whole_cost"]
            .resample(freq)
            .sum(min_count=1)  # XXX
            .dropna()
            .reorder_levels(["date", "user_id"])
            .sort_index()
            .astype("int64")
        )
        expected.name = "whole_cost"

        result1 = (
            df.sort_index().groupby([Grouper(freq=freq), "user_id"])["whole_cost"].sum()
        )
        tm.assert_series_equal(result1, expected)

        result2 = df.groupby([Grouper(freq=freq), "user_id"])["whole_cost"].sum()
        tm.assert_series_equal(result2, expected)
예제 #2
0
def test_repr():
    # GH18203
    result = repr(Grouper(key="A", freq="H"))
    expected = ("TimeGrouper(key='A', freq=<Hour>, axis=0, sort=True, "
                "closed='left', label='left', how='mean', "
                "convention='e', origin='start_day')")
    assert result == expected

    result = repr(Grouper(key="A", freq="H", origin="2000-01-01"))
    expected = ("TimeGrouper(key='A', freq=<Hour>, axis=0, sort=True, "
                "closed='left', label='left', how='mean', "
                "convention='e', origin=Timestamp('2000-01-01 00:00:00'))")
    assert result == expected
예제 #3
0
def test_aggregate_with_nat_size():
    # GH 9925
    n = 20
    data = np.random.randn(n, 4).astype('int64')
    normal_df = DataFrame(data, columns=['A', 'B', 'C', 'D'])
    normal_df['key'] = [1, 2, np.nan, 4, 5] * 4

    dt_df = DataFrame(data, columns=['A', 'B', 'C', 'D'])
    dt_df['key'] = [
        datetime(2013, 1, 1),
        datetime(2013, 1, 2), pd.NaT,
        datetime(2013, 1, 4),
        datetime(2013, 1, 5)
    ] * 4

    normal_grouped = normal_df.groupby('key')
    dt_grouped = dt_df.groupby(Grouper(key='key', freq='D'))

    normal_result = normal_grouped.size()
    dt_result = dt_grouped.size()

    pad = Series([0], index=[3])
    expected = normal_result.append(pad)
    expected = expected.sort_index()
    expected.index = date_range(start='2013-01-01',
                                freq='D',
                                periods=5,
                                name='key')
    assert_series_equal(expected, dt_result)
    assert dt_result.index.name == 'key'
예제 #4
0
def test_aggregate_with_nat(func, fill_value):
    # check TimeGrouper's aggregation is identical as normal groupby
    # if NaT is included, 'var', 'std', 'mean', 'first','last'
    # and 'nth' doesn't work yet

    n = 20
    data = np.random.randn(n, 4).astype('int64')
    normal_df = DataFrame(data, columns=['A', 'B', 'C', 'D'])
    normal_df['key'] = [1, 2, np.nan, 4, 5] * 4

    dt_df = DataFrame(data, columns=['A', 'B', 'C', 'D'])
    dt_df['key'] = [
        datetime(2013, 1, 1),
        datetime(2013, 1, 2), pd.NaT,
        datetime(2013, 1, 4),
        datetime(2013, 1, 5)
    ] * 4

    normal_grouped = normal_df.groupby('key')
    dt_grouped = dt_df.groupby(Grouper(key='key', freq='D'))

    normal_result = getattr(normal_grouped, func)()
    dt_result = getattr(dt_grouped, func)()

    pad = DataFrame([[fill_value] * 4],
                    index=[3],
                    columns=['A', 'B', 'C', 'D'])
    expected = normal_result.append(pad)
    expected = expected.sort_index()
    expected.index = date_range(start='2013-01-01',
                                freq='D',
                                periods=5,
                                name='key')
    assert_frame_equal(expected, dt_result)
    assert dt_result.index.name == 'key'
예제 #5
0
def test_aggregate_normal(resample_method):
    """Check TimeGrouper's aggregation is identical as normal groupby."""

    if resample_method == 'ohlc':
        pytest.xfail(reason='DataError: No numeric types to aggregate')

    data = np.random.randn(20, 4)
    normal_df = DataFrame(data, columns=['A', 'B', 'C', 'D'])
    normal_df['key'] = [1, 2, 3, 4, 5] * 4

    dt_df = DataFrame(data, columns=['A', 'B', 'C', 'D'])
    dt_df['key'] = [
        datetime(2013, 1, 1),
        datetime(2013, 1, 2),
        datetime(2013, 1, 3),
        datetime(2013, 1, 4),
        datetime(2013, 1, 5)
    ] * 4

    normal_grouped = normal_df.groupby('key')
    dt_grouped = dt_df.groupby(Grouper(key='key', freq='D'))

    expected = getattr(normal_grouped, resample_method)()
    dt_result = getattr(dt_grouped, resample_method)()
    expected.index = date_range(start='2013-01-01',
                                freq='D',
                                periods=5,
                                name='key')
    tm.assert_equal(expected, dt_result)

    # if TimeGrouper is used included, 'nth' doesn't work yet
    """
예제 #6
0
def test_aggregate_nth(resample_method):
    """Check TimeGrouper's aggregation is identical as normal groupby."""

    data = np.random.randn(20, 4)
    normal_df = DataFrame(data, columns=["A", "B", "C", "D"])
    normal_df["key"] = [1, 2, 3, 4, 5] * 4

    dt_df = DataFrame(data, columns=["A", "B", "C", "D"])
    dt_df["key"] = [
        datetime(2013, 1, 1),
        datetime(2013, 1, 2),
        datetime(2013, 1, 3),
        datetime(2013, 1, 4),
        datetime(2013, 1, 5),
    ] * 4

    normal_grouped = normal_df.groupby("key")
    dt_grouped = dt_df.groupby(Grouper(key="key", freq="D"))

    expected = normal_grouped.nth(3)
    expected.index = date_range(start="2013-01-01",
                                freq="D",
                                periods=5,
                                name="key")
    dt_result = dt_grouped.nth(3)
    tm.assert_frame_equal(expected, dt_result)
예제 #7
0
def test_repr():
    # GH18203
    result = repr(Grouper(key="A", freq="H"))
    expected = ("TimeGrouper(key='A', freq=<Hour>, axis=0, sort=True, "
                "closed='left', label='left', how='mean', "
                "convention='e', base=0)")
    assert result == expected
예제 #8
0
    def test_timegrouper_apply_return_type_value(self):
        # Using `apply` with the `TimeGrouper` should give the
        # same return type as an `apply` with a `Grouper`.
        # Issue #11742
        df = DataFrame({"date": ["10/10/2000", "11/10/2000"], "value": [10, 13]})
        df_dt = df.copy()
        df_dt["date"] = pd.to_datetime(df_dt["date"])

        def sumfunc_value(x):
            return x.value.sum()

        expected = df.groupby(Grouper(key="date")).apply(sumfunc_value)
        result = df_dt.groupby(Grouper(freq="M", key="date")).apply(sumfunc_value)
        tm.assert_series_equal(
            result.reset_index(drop=True), expected.reset_index(drop=True)
        )
예제 #9
0
    def test_groupby_with_timegrouper_methods(self, should_sort):
        # GH 3881
        # make sure API of timegrouper conforms

        df = DataFrame(
            {
                "Branch": "A A A A A B".split(),
                "Buyer": "Carl Mark Carl Joe Joe Carl".split(),
                "Quantity": [1, 3, 5, 8, 9, 3],
                "Date": [
                    datetime(2013, 1, 1, 13, 0),
                    datetime(2013, 1, 1, 13, 5),
                    datetime(2013, 10, 1, 20, 0),
                    datetime(2013, 10, 2, 10, 0),
                    datetime(2013, 12, 2, 12, 0),
                    datetime(2013, 12, 2, 14, 0),
                ],
            }
        )

        if should_sort:
            df = df.sort_values(by="Quantity", ascending=False)

        df = df.set_index("Date", drop=False)
        g = df.groupby(Grouper(freq="6M"))
        assert g.group_keys

        assert isinstance(g.grouper, BinGrouper)
        groups = g.groups
        assert isinstance(groups, dict)
        assert len(groups) == 3
예제 #10
0
def test_aggregate_with_nat_size():
    # GH 9925
    n = 20
    data = np.random.randn(n, 4).astype("int64")
    normal_df = DataFrame(data, columns=["A", "B", "C", "D"])
    normal_df["key"] = [1, 2, np.nan, 4, 5] * 4

    dt_df = DataFrame(data, columns=["A", "B", "C", "D"])
    dt_df["key"] = [
        datetime(2013, 1, 1),
        datetime(2013, 1, 2),
        pd.NaT,
        datetime(2013, 1, 4),
        datetime(2013, 1, 5),
    ] * 4

    normal_grouped = normal_df.groupby("key")
    dt_grouped = dt_df.groupby(Grouper(key="key", freq="D"))

    normal_result = normal_grouped.size()
    dt_result = dt_grouped.size()

    pad = Series([0], index=[3])
    expected = normal_result.append(pad)
    expected = expected.sort_index()
    expected.index = date_range(
        start="2013-01-01", freq="D", periods=5, name="key"
    )._with_freq(None)
    tm.assert_series_equal(expected, dt_result)
    assert dt_result.index.name == "key"
예제 #11
0
def test_aggregate_with_nat(func, fill_value):
    # check TimeGrouper's aggregation is identical as normal groupby
    # if NaT is included, 'var', 'std', 'mean', 'first','last'
    # and 'nth' doesn't work yet

    n = 20
    data = np.random.randn(n, 4).astype("int64")
    normal_df = DataFrame(data, columns=["A", "B", "C", "D"])
    normal_df["key"] = [1, 2, np.nan, 4, 5] * 4

    dt_df = DataFrame(data, columns=["A", "B", "C", "D"])
    dt_df["key"] = [
        datetime(2013, 1, 1),
        datetime(2013, 1, 2),
        pd.NaT,
        datetime(2013, 1, 4),
        datetime(2013, 1, 5),
    ] * 4

    normal_grouped = normal_df.groupby("key")
    dt_grouped = dt_df.groupby(Grouper(key="key", freq="D"))

    normal_result = getattr(normal_grouped, func)()
    dt_result = getattr(dt_grouped, func)()

    pad = DataFrame([[fill_value] * 4], index=[3], columns=["A", "B", "C", "D"])
    expected = normal_result.append(pad)
    expected = expected.sort_index()
    dti = date_range(start="2013-01-01", freq="D", periods=5, name="key")
    expected.index = dti._with_freq(None)  # TODO: is this desired?
    tm.assert_frame_equal(expected, dt_result)
    assert dt_result.index.name == "key"
예제 #12
0
    def test_groupby_apply_timegrouper_with_nat_apply_squeeze(
        self, frame_for_truncated_bingrouper
    ):
        df = frame_for_truncated_bingrouper

        # We need to create a GroupBy object with only one non-NaT group,
        #  so use a huge freq so that all non-NaT dates will be grouped together
        tdg = Grouper(key="Date", freq="100Y")

        with tm.assert_produces_warning(FutureWarning, match="`squeeze` parameter"):
            gb = df.groupby(tdg, squeeze=True)

        # check that we will go through the singular_series path
        #  in _wrap_applied_output_series
        assert gb.ngroups == 1
        assert gb._selected_obj._get_axis(gb.axis).nlevels == 1

        # function that returns a Series
        res = gb.apply(lambda x: x["Quantity"] * 2)

        key = Timestamp("2013-12-31")
        ordering = df["Date"].sort_values().dropna().index
        mi = MultiIndex.from_product([[key], ordering], names=["Date", None])

        ex_values = df["Quantity"].take(ordering).values * 2
        expected = Series(ex_values, index=mi, name="Quantity")
        tm.assert_series_equal(res, expected)
예제 #13
0
def test_aggregate_normal(resample_method):
    """Check TimeGrouper's aggregation is identical as normal groupby."""
    if resample_method == "ohlc":
        pytest.xfail(reason="DataError: No numeric types to aggregate")

    data = np.random.randn(20, 4)
    normal_df = DataFrame(data, columns=["A", "B", "C", "D"])
    normal_df["key"] = [1, 2, 3, 4, 5] * 4

    dt_df = DataFrame(data, columns=["A", "B", "C", "D"])
    dt_df["key"] = [
        datetime(2013, 1, 1),
        datetime(2013, 1, 2),
        datetime(2013, 1, 3),
        datetime(2013, 1, 4),
        datetime(2013, 1, 5),
    ] * 4

    normal_grouped = normal_df.groupby("key")
    dt_grouped = dt_df.groupby(Grouper(key="key", freq="D"))

    expected = getattr(normal_grouped, resample_method)()
    dt_result = getattr(dt_grouped, resample_method)()
    expected.index = date_range(start="2013-01-01", freq="D", periods=5, name="key")
    tm.assert_equal(expected, dt_result)

    # if TimeGrouper is used included, 'nth' doesn't work yet

    """
예제 #14
0
    def test_groupby_with_timegrouper(self):
        # GH 4161
        # TimeGrouper requires a sorted index
        # also verifies that the resultant index has the correct name
        df_original = DataFrame({
            "Buyer":
            "Carl Carl Carl Carl Joe Carl".split(),
            "Quantity": [18, 3, 5, 1, 9, 3],
            "Date": [
                datetime(2013, 9, 1, 13, 0),
                datetime(2013, 9, 1, 13, 5),
                datetime(2013, 10, 1, 20, 0),
                datetime(2013, 10, 3, 10, 0),
                datetime(2013, 12, 2, 12, 0),
                datetime(2013, 9, 2, 14, 0),
            ],
        })

        # GH 6908 change target column's order
        df_reordered = df_original.sort_values(by="Quantity")

        for df in [df_original, df_reordered]:
            df = df.set_index(["Date"])

            expected = DataFrame(
                {"Quantity": 0},
                index=date_range("20130901",
                                 "20131205",
                                 freq="5D",
                                 name="Date",
                                 inclusive="left"),
            )
            expected.iloc[[0, 6, 18], 0] = np.array([24, 6, 9], dtype="int64")

            msg = "The default value of numeric_only"
            with tm.assert_produces_warning(FutureWarning, match=msg):
                result1 = df.resample("5D").sum()
            tm.assert_frame_equal(result1, expected)

            df_sorted = df.sort_index()
            with tm.assert_produces_warning(FutureWarning, match=msg):
                result2 = df_sorted.groupby(Grouper(freq="5D")).sum()
            tm.assert_frame_equal(result2, expected)

            with tm.assert_produces_warning(FutureWarning, match=msg):
                result3 = df.groupby(Grouper(freq="5D")).sum()
            tm.assert_frame_equal(result3, expected)
예제 #15
0
def test_apply_iteration():
    # #2300
    N = 1000
    ind = pd.date_range(start="2000-01-01", freq="D", periods=N)
    df = DataFrame({'open': 1, 'close': 2}, index=ind)
    tg = Grouper(freq='M')

    _, grouper, _ = tg._get_grouper(df)

    # Errors
    grouped = df.groupby(grouper, group_keys=False)

    def f(df):
        return df['close'] / df['open']

    # it works!
    result = grouped.apply(f)
    tm.assert_index_equal(result.index, df.index)
예제 #16
0
def test_fails_on_no_datetime_index(name, func):
    n = 2
    index = func(n)
    df = DataFrame({"a": np.random.randn(n)}, index=index)

    msg = ("Only valid with DatetimeIndex, TimedeltaIndex "
           f"or PeriodIndex, but got an instance of '{name}'")
    with pytest.raises(TypeError, match=msg):
        df.groupby(Grouper(freq="D"))
예제 #17
0
def test_apply_iteration():
    # #2300
    N = 1000
    ind = pd.date_range(start="2000-01-01", freq="D", periods=N)
    df = DataFrame({"open": 1, "close": 2}, index=ind)
    tg = Grouper(freq="M")

    _, grouper, _ = tg._get_grouper(df)

    # Errors
    grouped = df.groupby(grouper, group_keys=False)

    def f(df):
        return df["close"] / df["open"]

    # it works!
    result = grouped.apply(f)
    tm.assert_index_equal(result.index, df.index)
예제 #18
0
def test_resampler_is_iterable(series):
    # GH 15314
    freq = "H"
    tg = Grouper(freq=freq, convention="start")
    grouped = series.groupby(tg)
    resampled = series.resample(freq)
    for (rk, rv), (gk, gv) in zip(resampled, grouped):
        assert rk == gk
        tm.assert_series_equal(rv, gv)
예제 #19
0
def test_count():
    test_series[::3] = np.nan

    expected = test_series.groupby(lambda x: x.year).count()

    grouper = Grouper(freq="A", label="right", closed="right")
    result = test_series.groupby(grouper).count()
    expected.index = result.index
    tm.assert_series_equal(result, expected)

    result = test_series.resample("A").count()
    expected.index = result.index
    tm.assert_series_equal(result, expected)
예제 #20
0
def test_count():
    test_series[::3] = np.nan

    expected = test_series.groupby(lambda x: x.year).count()

    grouper = Grouper(freq='A', label='right', closed='right')
    result = test_series.groupby(grouper).count()
    expected.index = result.index
    assert_series_equal(result, expected)

    result = test_series.resample('A').count()
    expected.index = result.index
    assert_series_equal(result, expected)
예제 #21
0
def test_custom_grouper(index):

    dti = index
    s = Series(np.array([1] * len(dti)), index=dti, dtype='int64')

    b = Grouper(freq=Minute(5))
    g = s.groupby(b)

    # check all cython functions work
    funcs = ['add', 'mean', 'prod', 'ohlc', 'min', 'max', 'var']
    for f in funcs:
        g._cython_agg_general(f)

    b = Grouper(freq=Minute(5), closed='right', label='right')
    g = s.groupby(b)
    # check all cython functions work
    funcs = ['add', 'mean', 'prod', 'ohlc', 'min', 'max', 'var']
    for f in funcs:
        g._cython_agg_general(f)

    assert g.ngroups == 2593
    assert notna(g.mean()).all()

    # construct expected val
    arr = [1] + [5] * 2592
    idx = dti[0:-1:5]
    idx = idx.append(dti[-1:])
    expect = Series(arr, index=idx)

    # GH2763 - return in put dtype if we can
    result = g.agg(np.sum)
    assert_series_equal(result, expect)

    df = DataFrame(np.random.rand(len(dti), 10),
                   index=dti, dtype='float64')
    r = df.groupby(b).agg(np.sum)

    assert len(r.columns) == 10
    assert len(r.index) == 2593
예제 #22
0
def test_apply():
    grouper = Grouper(freq="A", label="right", closed="right")

    grouped = test_series.groupby(grouper)

    def f(x):
        return x.sort_values()[-3:]

    applied = grouped.apply(f)
    expected = test_series.groupby(lambda x: x.year).apply(f)

    applied.index = applied.index.droplevel(0)
    expected.index = expected.index.droplevel(0)
    tm.assert_series_equal(applied, expected)
예제 #23
0
    def test_nunique_with_timegrouper_and_nat(self):
        # GH 17575
        test = DataFrame({
            "time": [
                Timestamp("2016-06-28 09:35:35"),
                pd.NaT,
                Timestamp("2016-06-28 16:46:28"),
            ],
            "data": ["1", "2", "3"],
        })

        grouper = Grouper(key="time", freq="h")
        result = test.groupby(grouper)["data"].nunique()
        expected = test[test.time.notnull()].groupby(grouper)["data"].nunique()
        expected.index = expected.index._with_freq(None)
        tm.assert_series_equal(result, expected)
예제 #24
0
    def test_timegrouper_apply_return_type_value(self):
        # Using `apply` with the `TimeGrouper` should give the
        # same return type as an `apply` with a `Grouper`.
        # Issue #11742
        df = pd.DataFrame({'date': ['10/10/2000', '11/10/2000'],
                           'value': [10, 13]})
        df_dt = df.copy()
        df_dt['date'] = pd.to_datetime(df_dt['date'])

        def sumfunc_value(x):
            return x.value.sum()

        expected = df.groupby(pd.Grouper(key='date')).apply(sumfunc_value)
        result = (df_dt.groupby(Grouper(freq='M', key='date'))
                  .apply(sumfunc_value))
        assert_series_equal(result.reset_index(drop=True),
                            expected.reset_index(drop=True))
예제 #25
0
def test_resample_frame_basic():
    df = tm.makeTimeDataFrame()

    b = Grouper(freq="M")
    g = df.groupby(b)

    # check all cython functions work
    funcs = ["add", "mean", "prod", "min", "max", "var"]
    for f in funcs:
        g._cython_agg_general(f)

    result = df.resample("A").mean()
    tm.assert_series_equal(result["A"], df["A"].resample("A").mean())

    result = df.resample("M").mean()
    tm.assert_series_equal(result["A"], df["A"].resample("M").mean())

    df.resample("M", kind="period").mean()
    df.resample("W-WED", kind="period").mean()
예제 #26
0
    def test_scalar_call_versus_list_call(self):
        # Issue: 17530
        data_frame = {
            "location": ["shanghai", "beijing", "shanghai"],
            "time": Series(
                ["2017-08-09 13:32:23", "2017-08-11 23:23:15", "2017-08-11 22:23:15"],
                dtype="datetime64[ns]",
            ),
            "value": [1, 2, 3],
        }
        data_frame = DataFrame(data_frame).set_index("time")
        grouper = Grouper(freq="D")

        grouped = data_frame.groupby(grouper)
        result = grouped.count()
        grouped = data_frame.groupby([grouper])
        expected = grouped.count()

        tm.assert_frame_equal(result, expected)
예제 #27
0
def test_resample_frame_basic():
    df = tm.makeTimeDataFrame()

    b = Grouper(freq='M')
    g = df.groupby(b)

    # check all cython functions work
    funcs = ['add', 'mean', 'prod', 'min', 'max', 'var']
    for f in funcs:
        g._cython_agg_general(f)

    result = df.resample('A').mean()
    assert_series_equal(result['A'], df['A'].resample('A').mean())

    result = df.resample('M').mean()
    assert_series_equal(result['A'], df['A'].resample('M').mean())

    df.resample('M', kind='period').mean()
    df.resample('W-WED', kind='period').mean()
예제 #28
0
def test_aaa_group_order():
    # GH 12840
    # check TimeGrouper perform stable sorts
    n = 20
    data = np.random.randn(n, 4)
    df = DataFrame(data, columns=["A", "B", "C", "D"])
    df["key"] = [
        datetime(2013, 1, 1),
        datetime(2013, 1, 2),
        datetime(2013, 1, 3),
        datetime(2013, 1, 4),
        datetime(2013, 1, 5),
    ] * 4
    grouped = df.groupby(Grouper(key="key", freq="D"))

    tm.assert_frame_equal(grouped.get_group(datetime(2013, 1, 1)), df[::5])
    tm.assert_frame_equal(grouped.get_group(datetime(2013, 1, 2)), df[1::5])
    tm.assert_frame_equal(grouped.get_group(datetime(2013, 1, 3)), df[2::5])
    tm.assert_frame_equal(grouped.get_group(datetime(2013, 1, 4)), df[3::5])
    tm.assert_frame_equal(grouped.get_group(datetime(2013, 1, 5)), df[4::5])
예제 #29
0
def test_aaa_group_order():
    # GH 12840
    # check TimeGrouper perform stable sorts
    n = 20
    data = np.random.randn(n, 4)
    df = DataFrame(data, columns=['A', 'B', 'C', 'D'])
    df['key'] = [
        datetime(2013, 1, 1),
        datetime(2013, 1, 2),
        datetime(2013, 1, 3),
        datetime(2013, 1, 4),
        datetime(2013, 1, 5)
    ] * 4
    grouped = df.groupby(Grouper(key='key', freq='D'))

    tm.assert_frame_equal(grouped.get_group(datetime(2013, 1, 1)), df[::5])
    tm.assert_frame_equal(grouped.get_group(datetime(2013, 1, 2)), df[1::5])
    tm.assert_frame_equal(grouped.get_group(datetime(2013, 1, 3)), df[2::5])
    tm.assert_frame_equal(grouped.get_group(datetime(2013, 1, 4)), df[3::5])
    tm.assert_frame_equal(grouped.get_group(datetime(2013, 1, 5)), df[4::5])
예제 #30
0
def groupby_with_truncated_bingrouper(frame_for_truncated_bingrouper):
    """
    GroupBy object such that gb.grouper is a BinGrouper and
    len(gb.grouper.result_index) < len(gb.grouper.group_keys_seq)

    Aggregations on this groupby should have

        dti = date_range("2013-09-01", "2013-10-01", freq="5D", name="Date")

    As either the index or an index level.
    """
    df = frame_for_truncated_bingrouper

    tdg = Grouper(key="Date", freq="5D")
    gb = df.groupby(tdg)

    # check we're testing the case we're interested in
    assert len(gb.grouper.result_index) != len(gb.grouper.group_keys_seq)

    return gb
예제 #31
0
def test_resample_ohlc(series):
    s = series

    grouper = Grouper(freq=Minute(5))
    expect = s.groupby(grouper).agg(lambda x: x[-1])
    result = s.resample("5Min").ohlc()

    assert len(result) == len(expect)
    assert len(result.columns) == 4

    xs = result.iloc[-2]
    assert xs["open"] == s[-6]
    assert xs["high"] == s[-6:-1].max()
    assert xs["low"] == s[-6:-1].min()
    assert xs["close"] == s[-2]

    xs = result.iloc[0]
    assert xs["open"] == s[0]
    assert xs["high"] == s[:5].max()
    assert xs["low"] == s[:5].min()
    assert xs["close"] == s[4]