def test_ts_plot_format_coord(self):
        def check_format_of_first_point(ax, expected_string):
            first_line = ax.get_lines()[0]
            first_x = first_line.get_xdata()[0].ordinal
            first_y = first_line.get_ydata()[0]
            try:
                assert expected_string == ax.format_coord(first_x, first_y)
            except (ValueError):
                pytest.skip("skipping test because issue forming "
                            "test comparison GH7664")

        annual = Series(1, index=date_range('2014-01-01', periods=3,
                                            freq='A-DEC'))
        _, ax = self.plt.subplots()
        annual.plot(ax=ax)
        check_format_of_first_point(ax, 't = 2014  y = 1.000000')

        # note this is added to the annual plot already in existence, and
        # changes its freq field
        daily = Series(1, index=date_range('2014-01-01', periods=3, freq='D'))
        daily.plot(ax=ax)
        check_format_of_first_point(ax,
                                    't = 2014-01-01  y = 1.000000')
        tm.close()

        # tsplot
        _, ax = self.plt.subplots()
        from pandas.tseries.plotting import tsplot
        tsplot(annual, self.plt.Axes.plot, ax=ax)
        check_format_of_first_point(ax, 't = 2014  y = 1.000000')
        tsplot(daily, self.plt.Axes.plot, ax=ax)
        check_format_of_first_point(ax, 't = 2014-01-01  y = 1.000000')
    def test_evenly_divisible_with_no_extra_bins(self):
        # 4076
        # when the frequency is evenly divisible, sometimes extra bins

        df = DataFrame(np.random.randn(9, 3),
                       index=date_range('2000-1-1', periods=9))
        result = df.resample('5D').mean()
        expected = pd.concat(
            [df.iloc[0:5].mean(), df.iloc[5:].mean()], axis=1).T
        expected.index = [Timestamp('2000-1-1'), Timestamp('2000-1-6')]
        assert_frame_equal(result, expected)

        index = date_range(start='2001-5-4', periods=28)
        df = DataFrame(
            [{'REST_KEY': 1, 'DLY_TRN_QT': 80, 'DLY_SLS_AMT': 90,
              'COOP_DLY_TRN_QT': 30, 'COOP_DLY_SLS_AMT': 20}] * 28 +
            [{'REST_KEY': 2, 'DLY_TRN_QT': 70, 'DLY_SLS_AMT': 10,
              'COOP_DLY_TRN_QT': 50, 'COOP_DLY_SLS_AMT': 20}] * 28,
            index=index.append(index)).sort_index()

        index = date_range('2001-5-4', periods=4, freq='7D')
        expected = DataFrame(
            [{'REST_KEY': 14, 'DLY_TRN_QT': 14, 'DLY_SLS_AMT': 14,
              'COOP_DLY_TRN_QT': 14, 'COOP_DLY_SLS_AMT': 14}] * 4,
            index=index)
        result = df.resample('7D').count()
        assert_frame_equal(result, expected)

        expected = DataFrame(
            [{'REST_KEY': 21, 'DLY_TRN_QT': 1050, 'DLY_SLS_AMT': 700,
              'COOP_DLY_TRN_QT': 560, 'COOP_DLY_SLS_AMT': 280}] * 4,
            index=index)
        result = df.resample('7D').sum()
        assert_frame_equal(result, expected)
Exemple #3
0
    def test_frame_align_aware(self):
        idx1 = date_range('2001', periods=5, freq='H', tz='US/Eastern')
        idx2 = date_range('2001', periods=5, freq='2H', tz='US/Eastern')
        df1 = DataFrame(np.random.randn(len(idx1), 3), idx1)
        df2 = DataFrame(np.random.randn(len(idx2), 3), idx2)
        new1, new2 = df1.align(df2)
        assert df1.index.tz == new1.index.tz
        assert df2.index.tz == new2.index.tz

        # different timezones convert to UTC

        # frame with frame
        df1_central = df1.tz_convert('US/Central')
        new1, new2 = df1.align(df1_central)
        assert new1.index.tz == pytz.UTC
        assert new2.index.tz == pytz.UTC

        # frame with Series
        new1, new2 = df1.align(df1_central[0], axis=0)
        assert new1.index.tz == pytz.UTC
        assert new2.index.tz == pytz.UTC

        df1[0].align(df1_central, axis=0)
        assert new1.index.tz == pytz.UTC
        assert new2.index.tz == pytz.UTC
    def test_from_weekly_resampling(self):
        idxh = date_range('1/1/1999', periods=52, freq='W')
        idxl = date_range('1/1/1999', periods=12, freq='M')
        high = Series(np.random.randn(len(idxh)), idxh)
        low = Series(np.random.randn(len(idxl)), idxl)
        _, ax = self.plt.subplots()
        low.plot(ax=ax)
        high.plot(ax=ax)

        expected_h = idxh.to_period().asi8.astype(np.float64)
        expected_l = np.array([1514, 1519, 1523, 1527, 1531, 1536, 1540, 1544,
                               1549, 1553, 1558, 1562], dtype=np.float64)
        for l in ax.get_lines():
            assert PeriodIndex(data=l.get_xdata()).freq == idxh.freq
            xdata = l.get_xdata(orig=False)
            if len(xdata) == 12:  # idxl lines
                tm.assert_numpy_array_equal(xdata, expected_l)
            else:
                tm.assert_numpy_array_equal(xdata, expected_h)
        tm.close()

        # tsplot
        from pandas.tseries.plotting import tsplot

        _, ax = self.plt.subplots()
        tsplot(low, self.plt.Axes.plot, ax=ax)
        lines = tsplot(high, self.plt.Axes.plot, ax=ax)
        for l in lines:
            assert PeriodIndex(data=l.get_xdata()).freq == idxh.freq
            xdata = l.get_xdata(orig=False)
            if len(xdata) == 12:  # idxl lines
                tm.assert_numpy_array_equal(xdata, expected_l)
            else:
                tm.assert_numpy_array_equal(xdata, expected_h)
Exemple #5
0
def test_resample_group_info(n, k):
    # GH10914

    # use a fixed seed to always have the same uniques
    prng = np.random.RandomState(1234)

    dr = date_range(start='2015-08-27', periods=n // 10, freq='T')
    ts = Series(prng.randint(0, n // k, n).astype('int64'),
                index=prng.choice(dr, n))

    left = ts.resample('30T').nunique()
    ix = date_range(start=ts.index.min(), end=ts.index.max(),
                    freq='30T')

    vals = ts.values
    bins = np.searchsorted(ix.values, ts.index, side='right')

    sorter = np.lexsort((vals, bins))
    vals, bins = vals[sorter], bins[sorter]

    mask = np.r_[True, vals[1:] != vals[:-1]]
    mask |= np.r_[True, bins[1:] != bins[:-1]]

    arr = np.bincount(bins[mask] - 1,
                      minlength=len(ix)).astype('int64', copy=False)
    right = Series(arr, index=ix)

    assert_series_equal(left, right)
Exemple #6
0
def test_resample_upsampling_picked_but_not_correct():

    # Test for issue #3020
    dates = date_range('01-Jan-2014', '05-Jan-2014', freq='D')
    series = Series(1, index=dates)

    result = series.resample('D').mean()
    assert result.index[0] == dates[0]

    # GH 5955
    # incorrect deciding to upsample when the axis frequency matches the
    # resample frequency

    s = Series(np.arange(1., 6), index=[datetime(
        1975, 1, i, 12, 0) for i in range(1, 6)])
    expected = Series(np.arange(1., 6), index=date_range(
        '19750101', periods=5, freq='D'))

    result = s.resample('D').count()
    assert_series_equal(result, Series(1, index=expected.index))

    result1 = s.resample('D').sum()
    result2 = s.resample('D').mean()
    assert_series_equal(result1, expected)
    assert_series_equal(result2, expected)
Exemple #7
0
def test_resample_base():
    rng = date_range('1/1/2000 00:00:00', '1/1/2000 02:00', freq='s')
    ts = Series(np.random.randn(len(rng)), index=rng)

    resampled = ts.resample('5min', base=2).mean()
    exp_rng = date_range('12/31/1999 23:57:00', '1/1/2000 01:57',
                         freq='5min')
    tm.assert_index_equal(resampled.index, exp_rng)
Exemple #8
0
 def test_mixed_freq_hf_first(self):
     idxh = date_range('1/1/1999', periods=365, freq='D')
     idxl = date_range('1/1/1999', periods=12, freq='M')
     high = Series(np.random.randn(len(idxh)), idxh)
     low = Series(np.random.randn(len(idxl)), idxl)
     high.plot()
     ax = low.plot()
     for l in ax.get_lines():
         assert PeriodIndex(data=l.get_xdata()).freq == 'D'
Exemple #9
0
def test_resample_rounding():
    # GH 8371
    # odd results when rounding is needed

    data = """date,time,value
11-08-2014,00:00:01.093,1
11-08-2014,00:00:02.159,1
11-08-2014,00:00:02.667,1
11-08-2014,00:00:03.175,1
11-08-2014,00:00:07.058,1
11-08-2014,00:00:07.362,1
11-08-2014,00:00:08.324,1
11-08-2014,00:00:08.830,1
11-08-2014,00:00:08.982,1
11-08-2014,00:00:09.815,1
11-08-2014,00:00:10.540,1
11-08-2014,00:00:11.061,1
11-08-2014,00:00:11.617,1
11-08-2014,00:00:13.607,1
11-08-2014,00:00:14.535,1
11-08-2014,00:00:15.525,1
11-08-2014,00:00:17.960,1
11-08-2014,00:00:20.674,1
11-08-2014,00:00:21.191,1"""

    df = pd.read_csv(StringIO(data), parse_dates={'timestamp': [
        'date', 'time']}, index_col='timestamp')
    df.index.name = None
    result = df.resample('6s').sum()
    expected = DataFrame({'value': [
        4, 9, 4, 2
    ]}, index=date_range('2014-11-08', freq='6s', periods=4))
    assert_frame_equal(result, expected)

    result = df.resample('7s').sum()
    expected = DataFrame({'value': [
        4, 10, 4, 1
    ]}, index=date_range('2014-11-08', freq='7s', periods=4))
    assert_frame_equal(result, expected)

    result = df.resample('11s').sum()
    expected = DataFrame({'value': [
        11, 8
    ]}, index=date_range('2014-11-08', freq='11s', periods=2))
    assert_frame_equal(result, expected)

    result = df.resample('13s').sum()
    expected = DataFrame({'value': [
        13, 6
    ]}, index=date_range('2014-11-08', freq='13s', periods=2))
    assert_frame_equal(result, expected)

    result = df.resample('17s').sum()
    expected = DataFrame({'value': [
        16, 3
    ]}, index=date_range('2014-11-08', freq='17s', periods=2))
    assert_frame_equal(result, expected)
Exemple #10
0
def test_downsample_across_dst():
    # GH 8531
    tz = pytz.timezone('Europe/Berlin')
    dt = datetime(2014, 10, 26)
    dates = date_range(tz.localize(dt), periods=4, freq='2H')
    result = Series(5, index=dates).resample('H').mean()
    expected = Series([5., np.nan] * 3 + [5.],
                      index=date_range(tz.localize(dt), periods=7,
                                       freq='H'))
    tm.assert_series_equal(result, expected)
Exemple #11
0
def test_resample_extra_index_point():
    # GH#9756
    index = date_range(start='20150101', end='20150331', freq='BM')
    expected = DataFrame({'A': Series([21, 41, 63], index=index)})

    index = date_range(start='20150101', end='20150331', freq='B')
    df = DataFrame(
        {'A': Series(range(len(index)), index=index)}, dtype='int64')
    result = df.resample('BM').last()
    assert_frame_equal(result, expected)
 def test_tz_localize_convert_copy_inplace_mutate(self, copy, method, tz):
     # GH 6326
     result = Series(np.arange(0, 5),
                     index=date_range('20131027', periods=5, freq='1H',
                                      tz=tz))
     getattr(result, method)('UTC', copy=copy)
     expected = Series(np.arange(0, 5),
                       index=date_range('20131027', periods=5, freq='1H',
                                        tz=tz))
     tm.assert_series_equal(result, expected)
    def test_resample_tz_localized(self):
        dr = date_range(start='2012-4-13', end='2012-5-1')
        ts = Series(lrange(len(dr)), dr)

        ts_utc = ts.tz_localize('UTC')
        ts_local = ts_utc.tz_convert('America/Los_Angeles')

        result = ts_local.resample('W').mean()

        ts_local_naive = ts_local.copy()
        ts_local_naive.index = [x.replace(tzinfo=None)
                                for x in ts_local_naive.index.to_pydatetime()]

        exp = ts_local_naive.resample(
            'W').mean().tz_localize('America/Los_Angeles')

        assert_series_equal(result, exp)

        # it works
        result = ts_local.resample('D').mean()

        # #2245
        idx = date_range('2001-09-20 15:59', '2001-09-20 16:00', freq='T',
                         tz='Australia/Sydney')
        s = Series([1, 2], index=idx)

        result = s.resample('D', closed='right', label='right').mean()
        ex_index = date_range('2001-09-21', periods=1, freq='D',
                              tz='Australia/Sydney')
        expected = Series([1.5], index=ex_index)

        assert_series_equal(result, expected)

        # for good measure
        result = s.resample('D', kind='period').mean()
        ex_index = period_range('2001-09-20', periods=1, freq='D')
        expected = Series([1.5], index=ex_index)
        assert_series_equal(result, expected)

        # GH 6397
        # comparing an offset that doesn't propagate tz's
        rng = date_range('1/1/2011', periods=20000, freq='H')
        rng = rng.tz_localize('EST')
        ts = DataFrame(index=rng)
        ts['first'] = np.random.randn(len(rng))
        ts['second'] = np.cumsum(np.random.randn(len(rng)))
        expected = DataFrame(
            {
                'first': ts.resample('A').sum()['first'],
                'second': ts.resample('A').mean()['second']},
            columns=['first', 'second'])
        result = ts.resample(
            'A').agg({'first': np.sum,
                      'second': np.mean}).reindex(columns=['first', 'second'])
        assert_frame_equal(result, expected)
Exemple #14
0
    def test_series_tz_convert(self):
        rng = date_range('1/1/2011', periods=200, freq='D', tz='US/Eastern')
        ts = Series(1, index=rng)

        result = ts.tz_convert('Europe/Berlin')
        assert result.index.tz.zone == 'Europe/Berlin'

        # can't convert tz-naive
        rng = date_range('1/1/2011', periods=200, freq='D')
        ts = Series(1, index=rng)
        tm.assert_raises_regex(TypeError, "Cannot convert tz-naive",
                               ts.tz_convert, 'US/Eastern')
Exemple #15
0
def test_resample_loffset_upsample():
    # GH 20744
    rng = date_range('1/1/2000 00:00:00', '1/1/2000 00:13:00', freq='min')
    s = Series(np.random.randn(14), index=rng)

    result = s.resample('5min', closed='right', label='right',
                        loffset=timedelta(minutes=1)).ffill()
    idx = date_range('1/1/2000', periods=4, freq='5min')
    expected = Series([s[0], s[5], s[10], s[-1]],
                      index=idx + timedelta(minutes=1))

    assert_series_equal(result, expected)
Exemple #16
0
    def test_series_tz_localize(self):

        rng = date_range('1/1/2011', periods=100, freq='H')
        ts = Series(1, index=rng)

        result = ts.tz_localize('utc')
        assert result.index.tz.zone == 'UTC'

        # Can't localize if already tz-aware
        rng = date_range('1/1/2011', periods=100, freq='H', tz='utc')
        ts = Series(1, index=rng)
        tm.assert_raises_regex(TypeError, 'Already tz-aware',
                               ts.tz_localize, 'US/Eastern')
Exemple #17
0
 def test_secondary_upsample(self):
     idxh = date_range('1/1/1999', periods=365, freq='D')
     idxl = date_range('1/1/1999', periods=12, freq='M')
     high = Series(np.random.randn(len(idxh)), idxh)
     low = Series(np.random.randn(len(idxl)), idxl)
     low.plot()
     ax = high.plot(secondary_y=True)
     for l in ax.get_lines():
         assert PeriodIndex(l.get_xdata()).freq == 'D'
     assert hasattr(ax, 'left_ax')
     assert not hasattr(ax, 'right_ax')
     for l in ax.left_ax.get_lines():
         assert PeriodIndex(l.get_xdata()).freq == 'D'
Exemple #18
0
    def test_frame_join_tzaware(self):
        test1 = DataFrame(np.zeros((6, 3)),
                          index=date_range("2012-11-15 00:00:00", periods=6,
                                           freq="100L", tz="US/Central"))
        test2 = DataFrame(np.zeros((3, 3)),
                          index=date_range("2012-11-15 00:00:00", periods=3,
                                           freq="250L", tz="US/Central"),
                          columns=lrange(3, 6))

        result = test1.join(test2, how='outer')
        ex_index = test1.index.union(test2.index)

        tm.assert_index_equal(result.index, ex_index)
        assert result.index.tz.zone == 'US/Central'
Exemple #19
0
    def test_secondary_y_regular_ts_xlim(self):
        # GH 3490 - regular-timeseries with secondary y
        index_1 = date_range(start='2000-01-01', periods=4, freq='D')
        index_2 = date_range(start='2000-01-05', periods=4, freq='D')
        s1 = Series(1, index=index_1)
        s2 = Series(2, index=index_2)

        ax = s1.plot()
        left_before, right_before = ax.get_xlim()
        s2.plot(secondary_y=True, ax=ax)
        left_after, right_after = ax.get_xlim()

        assert left_before == left_after
        assert right_before < right_after
Exemple #20
0
def test_resample_size():
    n = 10000
    dr = date_range('2015-09-19', periods=n, freq='T')
    ts = Series(np.random.randn(n), index=np.random.choice(dr, n))

    left = ts.resample('7T').size()
    ix = date_range(start=left.index.min(), end=ts.index.max(), freq='7T')

    bins = np.searchsorted(ix.values, ts.index.values, side='right')
    val = np.bincount(bins, minlength=len(ix) + 1)[1:].astype('int64',
                                                              copy=False)

    right = Series(val, index=ix)
    assert_series_equal(left, right)
 def test_high_freq(self):
     freaks = ['ms', 'us']
     for freq in freaks:
         _, ax = self.plt.subplots()
         rng = date_range('1/1/2012', periods=100000, freq=freq)
         ser = Series(np.random.randn(len(rng)), rng)
         _check_plot_works(ser.plot, ax=ax)
 def test_fill_method_and_how_upsample(self):
     # GH2073
     s = Series(np.arange(9, dtype='int64'),
                index=date_range('2010-01-01', periods=9, freq='Q'))
     last = s.resample('M').ffill()
     both = s.resample('M').ffill().resample('M').last().astype('int64')
     assert_series_equal(last, both)
 def test_fake_inferred_business(self):
     _, ax = self.plt.subplots()
     rng = date_range('2001-1-1', '2001-1-10')
     ts = Series(lrange(len(rng)), rng)
     ts = ts[:3].append(ts[5:])
     ts.plot(ax=ax)
     assert not hasattr(ax, 'freq')
    def test_plot_offset_freq(self):
        ser = tm.makeTimeSeries()
        _check_plot_works(ser.plot)

        dr = date_range(ser.index[0], freq='BQS', periods=10)
        ser = Series(np.random.randn(len(dr)), dr)
        _check_plot_works(ser.plot)
    def test_series_append_dst(self):
        rng1 = date_range('1/1/2016 01:00', periods=3, freq='H',
                          tz='US/Eastern')
        rng2 = date_range('8/1/2016 01:00', periods=3, freq='H',
                          tz='US/Eastern')
        ser1 = Series([1, 2, 3], index=rng1)
        ser2 = Series([10, 11, 12], index=rng2)
        ts_result = ser1.append(ser2)

        exp_index = DatetimeIndex(['2016-01-01 01:00', '2016-01-01 02:00',
                                   '2016-01-01 03:00', '2016-08-01 01:00',
                                   '2016-08-01 02:00', '2016-08-01 03:00'],
                                  tz='US/Eastern')
        exp = Series([1, 2, 3, 10, 11, 12], index=exp_index)
        tm.assert_series_equal(ts_result, exp)
        assert ts_result.index.tz == rng1.tz
Exemple #26
0
def test_nearest_upsample_with_limit():
    rng = date_range('1/1/2000', periods=3, freq='5t')
    ts = Series(np.random.randn(len(rng)), rng)

    result = ts.resample('t').nearest(limit=2)
    expected = ts.reindex(result.index, method='nearest', limit=2)
    assert_series_equal(result, expected)
    def test_mixed_freq_shared_ax(self):

        # GH13341, using sharex=True
        idx1 = date_range('2015-01-01', periods=3, freq='M')
        idx2 = idx1[:1].union(idx1[2:])
        s1 = Series(range(len(idx1)), idx1)
        s2 = Series(range(len(idx2)), idx2)

        fig, (ax1, ax2) = self.plt.subplots(nrows=2, sharex=True)
        s1.plot(ax=ax1)
        s2.plot(ax=ax2)

        assert ax1.freq == 'M'
        assert ax2.freq == 'M'
        assert (ax1.lines[0].get_xydata()[0, 0] ==
                ax2.lines[0].get_xydata()[0, 0])

        # using twinx
        fig, ax1 = self.plt.subplots()
        ax2 = ax1.twinx()
        s1.plot(ax=ax1)
        s2.plot(ax=ax2)

        assert (ax1.lines[0].get_xydata()[0, 0] ==
                ax2.lines[0].get_xydata()[0, 0])
    def test_secondary_y_ts(self):
        idx = date_range('1/1/2000', periods=10)
        ser = Series(np.random.randn(10), idx)
        ser2 = Series(np.random.randn(10), idx)
        fig, _ = self.plt.subplots()
        ax = ser.plot(secondary_y=True)
        assert hasattr(ax, 'left_ax')
        assert not hasattr(ax, 'right_ax')
        axes = fig.get_axes()
        l = ax.get_lines()[0]
        xp = Series(l.get_ydata(), l.get_xdata()).to_timestamp()
        assert_series_equal(ser, xp)
        assert ax.get_yaxis().get_ticks_position() == 'right'
        assert not axes[0].get_yaxis().get_visible()
        self.plt.close(fig)

        _, ax2 = self.plt.subplots()
        ser2.plot(ax=ax2)
        assert (ax2.get_yaxis().get_ticks_position() ==
                self.default_tick_position)
        self.plt.close(ax2.get_figure())

        ax = ser2.plot()
        ax2 = ser.plot(secondary_y=True)
        assert ax.get_yaxis().get_visible()
    def test_tz_aware_asfreq(self, tz):
        dr = date_range('2011-12-01', '2012-07-20', freq='D', tz=tz)

        ser = Series(np.random.randn(len(dr)), index=dr)

        # it works!
        ser.asfreq('T')
 def test_series_truncate_datetimeindex_tz(self):
     # GH 9243
     idx = date_range('4/1/2005', '4/30/2005', freq='D', tz='US/Pacific')
     s = Series(range(len(idx)), index=idx)
     result = s.truncate(datetime(2005, 4, 2), datetime(2005, 4, 4))
     expected = Series([1, 2, 3], index=idx[1:4])
     tm.assert_series_equal(result, expected)
Exemple #31
0
 def _simple_date_range_series(start, end, freq="D"):
     rng = date_range(start, end, freq=freq)
     return Series(np.random.randn(len(rng)), index=rng)
from collections import OrderedDict
from datetime import datetime

import numpy as np
import pytest

import pandas as pd
from pandas import DataFrame, Series
import pandas._testing as tm
from pandas.core.indexes.datetimes import date_range

dti = date_range(start=datetime(2005, 1, 1), end=datetime(2005, 1, 10), freq="Min")

test_series = Series(np.random.rand(len(dti)), dti)
_test_frame = DataFrame({"A": test_series, "B": test_series, "C": np.arange(len(dti))})


@pytest.fixture
def test_frame():
    return _test_frame.copy()


def test_str():

    r = test_series.resample("H")
    assert (
        "DatetimeIndexResampler [freq=<Hour>, axis=0, closed=left, "
        "label=left, convention=start, origin=start_day]" in str(r)
    )

    r = test_series.resample("H", origin="2000-01-01")
def test_agg():
    # test with all three Resampler apis and TimeGrouper

    np.random.seed(1234)
    index = date_range(datetime(2005, 1, 1), datetime(2005, 1, 10), freq="D")
    index.name = "date"
    df = DataFrame(np.random.rand(10, 2), columns=list("AB"), index=index)
    df_col = df.reset_index()
    df_mult = df_col.copy()
    df_mult.index = pd.MultiIndex.from_arrays(
        [range(10), df.index], names=["index", "date"]
    )
    r = df.resample("2D")
    cases = [
        r,
        df_col.resample("2D", on="date"),
        df_mult.resample("2D", level="date"),
        df.groupby(pd.Grouper(freq="2D")),
    ]

    a_mean = r["A"].mean()
    a_std = r["A"].std()
    a_sum = r["A"].sum()
    b_mean = r["B"].mean()
    b_std = r["B"].std()
    b_sum = r["B"].sum()

    expected = pd.concat([a_mean, a_std, b_mean, b_std], axis=1)
    expected.columns = pd.MultiIndex.from_product([["A", "B"], ["mean", "std"]])
    for t in cases:
        result = t.aggregate([np.mean, np.std])
        tm.assert_frame_equal(result, expected)

    expected = pd.concat([a_mean, b_std], axis=1)
    for t in cases:
        result = t.aggregate({"A": np.mean, "B": np.std})
        tm.assert_frame_equal(result, expected, check_like=True)

    expected = pd.concat([a_mean, a_std], axis=1)
    expected.columns = pd.MultiIndex.from_tuples([("A", "mean"), ("A", "std")])
    for t in cases:
        result = t.aggregate({"A": ["mean", "std"]})
        tm.assert_frame_equal(result, expected)

    expected = pd.concat([a_mean, a_sum], axis=1)
    expected.columns = ["mean", "sum"]
    for t in cases:
        result = t["A"].aggregate(["mean", "sum"])
    tm.assert_frame_equal(result, expected)

    msg = "nested renamer is not supported"
    for t in cases:
        with pytest.raises(pd.core.base.SpecificationError, match=msg):
            t.aggregate({"A": {"mean": "mean", "sum": "sum"}})

    expected = pd.concat([a_mean, a_sum, b_mean, b_sum], axis=1)
    expected.columns = pd.MultiIndex.from_tuples(
        [("A", "mean"), ("A", "sum"), ("B", "mean2"), ("B", "sum2")]
    )
    for t in cases:
        with pytest.raises(pd.core.base.SpecificationError, match=msg):
            t.aggregate(
                {
                    "A": {"mean": "mean", "sum": "sum"},
                    "B": {"mean2": "mean", "sum2": "sum"},
                }
            )

    expected = pd.concat([a_mean, a_std, b_mean, b_std], axis=1)
    expected.columns = pd.MultiIndex.from_tuples(
        [("A", "mean"), ("A", "std"), ("B", "mean"), ("B", "std")]
    )
    for t in cases:
        result = t.aggregate({"A": ["mean", "std"], "B": ["mean", "std"]})
        tm.assert_frame_equal(result, expected, check_like=True)

    expected = pd.concat([a_mean, a_sum, b_mean, b_sum], axis=1)
    expected.columns = pd.MultiIndex.from_tuples(
        [
            ("r1", "A", "mean"),
            ("r1", "A", "sum"),
            ("r2", "B", "mean"),
            ("r2", "B", "sum"),
        ]
    )
Exemple #34
0
def test_agg_misc():
    # test with all three Resampler apis and TimeGrouper

    np.random.seed(1234)
    index = date_range(datetime(2005, 1, 1), datetime(2005, 1, 10), freq='D')
    index.name = 'date'
    df = DataFrame(np.random.rand(10, 2), columns=list('AB'), index=index)
    df_col = df.reset_index()
    df_mult = df_col.copy()
    df_mult.index = pd.MultiIndex.from_arrays([range(10), df.index],
                                              names=['index', 'date'])

    r = df.resample('2D')
    cases = [
        r,
        df_col.resample('2D', on='date'),
        df_mult.resample('2D', level='date'),
        df.groupby(pd.Grouper(freq='2D'))
    ]

    # passed lambda
    for t in cases:
        result = t.agg({'A': np.sum, 'B': lambda x: np.std(x, ddof=1)})
        rcustom = t['B'].apply(lambda x: np.std(x, ddof=1))
        expected = pd.concat([r['A'].sum(), rcustom], axis=1)
        assert_frame_equal(result, expected, check_like=True)

    # agg with renamers
    expected = pd.concat(
        [t['A'].sum(), t['B'].sum(), t['A'].mean(), t['B'].mean()], axis=1)
    expected.columns = pd.MultiIndex.from_tuples([('result1', 'A'),
                                                  ('result1', 'B'),
                                                  ('result2', 'A'),
                                                  ('result2', 'B')])

    for t in cases:
        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            result = t[['A', 'B']].agg(
                OrderedDict([('result1', np.sum), ('result2', np.mean)]))
        assert_frame_equal(result, expected, check_like=True)

    # agg with different hows
    expected = pd.concat(
        [t['A'].sum(), t['A'].std(), t['B'].mean(), t['B'].std()], axis=1)
    expected.columns = pd.MultiIndex.from_tuples([('A', 'sum'), ('A', 'std'),
                                                  ('B', 'mean'), ('B', 'std')])
    for t in cases:
        result = t.agg(
            OrderedDict([('A', ['sum', 'std']), ('B', ['mean', 'std'])]))
        assert_frame_equal(result, expected, check_like=True)

    # equivalent of using a selection list / or not
    for t in cases:
        result = t[['A', 'B']].agg({'A': ['sum', 'std'], 'B': ['mean', 'std']})
        assert_frame_equal(result, expected, check_like=True)

    # series like aggs
    for t in cases:
        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            result = t['A'].agg({'A': ['sum', 'std']})
        expected = pd.concat([t['A'].sum(), t['A'].std()], axis=1)
        expected.columns = pd.MultiIndex.from_tuples([('A', 'sum'),
                                                      ('A', 'std')])
        assert_frame_equal(result, expected, check_like=True)

        expected = pd.concat(
            [t['A'].agg(['sum', 'std']), t['A'].agg(['mean', 'std'])], axis=1)
        expected.columns = pd.MultiIndex.from_tuples([('A', 'sum'),
                                                      ('A', 'std'),
                                                      ('B', 'mean'),
                                                      ('B', 'std')])
        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            result = t['A'].agg({'A': ['sum', 'std'], 'B': ['mean', 'std']})
        assert_frame_equal(result, expected, check_like=True)

    # errors
    # invalid names in the agg specification
    msg = "\"Column 'B' does not exist!\""
    for t in cases:
        with pytest.raises(KeyError, match=msg):
            t[['A']].agg({'A': ['sum', 'std'], 'B': ['mean', 'std']})
import pytest

import pandas.util._test_decorators as td
from pandas.util._test_decorators import async_mark

import pandas as pd
from pandas import DataFrame, Series, Timestamp, compat
import pandas._testing as tm
from pandas.core.indexes.datetimes import date_range

test_frame = DataFrame(
    {
        "A": [1] * 20 + [2] * 12 + [3] * 8,
        "B": np.arange(40)
    },
    index=date_range("1/1/2000", freq="s", periods=40),
)


@async_mark()
@td.check_file_leaks
async def test_tab_complete_ipython6_warning(ip):
    from IPython.core.completer import provisionalcompleter

    code = dedent("""\
    import pandas._testing as tm
    s = tm.makeTimeSeries()
    rs = s.resample("D")
    """)
    await ip.run_code(code)
Exemple #36
0
    def __call__(self):
        # if no data have been set, this will tank with a ValueError
        try:
            dmin, dmax = self.viewlim_to_dt()
        except ValueError:
            return []

        # We need to cap at the endpoints of valid datetime

        # FIXME: dont leave commented-out
        # TODO(wesm) unused?
        # if dmin > dmax:
        #     dmax, dmin = dmin, dmax
        # delta = relativedelta(dmax, dmin)
        # try:
        #     start = dmin - delta
        # except ValueError:
        #     start = _from_ordinal(1.0)

        # try:
        #     stop = dmax + delta
        # except ValueError:
        #     # The magic number!
        #     stop = _from_ordinal(3652059.9999999)

        nmax, nmin = dates.date2num((dmax, dmin))

        num = (nmax - nmin) * 86400 * 1000
        max_millis_ticks = 6
        for interval in [1, 10, 50, 100, 200, 500]:
            if num <= interval * (max_millis_ticks - 1):
                self._interval = interval
                break
            else:
                # We went through the whole loop without breaking, default to 1
                self._interval = 1000.0

        estimate = (nmax - nmin) / (self._get_unit() * self._get_interval())

        if estimate > self.MAXTICKS * 2:
            raise RuntimeError(
                "MillisecondLocator estimated to generate "
                f"{estimate:d} ticks from {dmin} to {dmax}: exceeds Locator.MAXTICKS"
                f"* 2 ({self.MAXTICKS * 2:d}) "
            )

        interval = self._get_interval()
        freq = f"{interval}L"
        tz = self.tz.tzname(None)
        st = _from_ordinal(dates.date2num(dmin))  # strip tz
        ed = _from_ordinal(dates.date2num(dmax))
        all_dates = date_range(start=st, end=ed, freq=freq, tz=tz).astype(object)

        try:
            if len(all_dates) > 0:
                locs = self.raise_if_exceeds(dates.date2num(all_dates))
                return locs
        except Exception:  # pragma: no cover
            pass

        lims = dates.date2num([dmin, dmax])
        return lims
Exemple #37
0
    def test_resample_tz_localized(self):
        dr = date_range(start="2012-4-13", end="2012-5-1")
        ts = Series(range(len(dr)), index=dr)

        ts_utc = ts.tz_localize("UTC")
        ts_local = ts_utc.tz_convert("America/Los_Angeles")

        result = ts_local.resample("W").mean()

        ts_local_naive = ts_local.copy()
        ts_local_naive.index = [
            x.replace(tzinfo=None)
            for x in ts_local_naive.index.to_pydatetime()
        ]

        exp = ts_local_naive.resample("W").mean().tz_localize(
            "America/Los_Angeles")
        exp.index = pd.DatetimeIndex(exp.index, freq="W")

        tm.assert_series_equal(result, exp)

        # it works
        result = ts_local.resample("D").mean()

        # #2245
        idx = date_range("2001-09-20 15:59",
                         "2001-09-20 16:00",
                         freq="T",
                         tz="Australia/Sydney")
        s = Series([1, 2], index=idx)

        result = s.resample("D", closed="right", label="right").mean()
        ex_index = date_range("2001-09-21",
                              periods=1,
                              freq="D",
                              tz="Australia/Sydney")
        expected = Series([1.5], index=ex_index)

        tm.assert_series_equal(result, expected)

        # for good measure
        result = s.resample("D", kind="period").mean()
        ex_index = period_range("2001-09-20", periods=1, freq="D")
        expected = Series([1.5], index=ex_index)
        tm.assert_series_equal(result, expected)

        # GH 6397
        # comparing an offset that doesn't propagate tz's
        rng = date_range("1/1/2011", periods=20000, freq="H")
        rng = rng.tz_localize("EST")
        ts = DataFrame(index=rng)
        ts["first"] = np.random.randn(len(rng))
        ts["second"] = np.cumsum(np.random.randn(len(rng)))
        expected = DataFrame(
            {
                "first": ts.resample("A").sum()["first"],
                "second": ts.resample("A").mean()["second"],
            },
            columns=["first", "second"],
        )
        result = (ts.resample("A").agg({
            "first": np.sum,
            "second": np.mean
        }).reindex(columns=["first", "second"]))
        tm.assert_frame_equal(result, expected)
from datetime import datetime
from operator import methodcaller

import numpy as np
import pytest

import pandas as pd
from pandas import DataFrame, Series, Timestamp
import pandas._testing as tm
from pandas.core.groupby.grouper import Grouper
from pandas.core.indexes.datetimes import date_range

test_series = Series(np.random.randn(1000),
                     index=date_range("1/1/2000", periods=1000))


def test_apply():
    grouper = Grouper(freq="A", label="right", closed="right")

    grouped = test_series.groupby(grouper)

    def f(x):
        return x.sort_values()[-3:]

    applied = grouped.apply(f)
    expected = test_series.groupby(lambda x: x.year).apply(f)

    applied.index = applied.index.droplevel(0)
    expected.index = expected.index.droplevel(0)
    tm.assert_series_equal(applied, expected)
Exemple #39
0
def interval_range(
    start=None, end=None, periods=None, freq=None, name=None, closed="right"
):
    """
    Return a fixed frequency IntervalIndex.

    Parameters
    ----------
    start : numeric or datetime-like, default None
        Left bound for generating intervals.
    end : numeric or datetime-like, default None
        Right bound for generating intervals.
    periods : int, default None
        Number of periods to generate.
    freq : numeric, str, or DateOffset, default None
        The length of each interval. Must be consistent with the type of start
        and end, e.g. 2 for numeric, or '5H' for datetime-like.  Default is 1
        for numeric and 'D' for datetime-like.
    name : str, default None
        Name of the resulting IntervalIndex.
    closed : {'left', 'right', 'both', 'neither'}, default 'right'
        Whether the intervals are closed on the left-side, right-side, both
        or neither.

    Returns
    -------
    IntervalIndex

    See Also
    --------
    IntervalIndex : An Index of intervals that are all closed on the same side.

    Notes
    -----
    Of the four parameters ``start``, ``end``, ``periods``, and ``freq``,
    exactly three must be specified. If ``freq`` is omitted, the resulting
    ``IntervalIndex`` will have ``periods`` linearly spaced elements between
    ``start`` and ``end``, inclusively.

    To learn more about datetime-like frequency strings, please see `this link
    <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`__.

    Examples
    --------
    Numeric ``start`` and  ``end`` is supported.

    >>> pd.interval_range(start=0, end=5)
    IntervalIndex([(0, 1], (1, 2], (2, 3], (3, 4], (4, 5]],
                  closed='right', dtype='interval[int64]')

    Additionally, datetime-like input is also supported.

    >>> pd.interval_range(start=pd.Timestamp('2017-01-01'),
    ...                   end=pd.Timestamp('2017-01-04'))
    IntervalIndex([(2017-01-01, 2017-01-02], (2017-01-02, 2017-01-03],
                   (2017-01-03, 2017-01-04]],
                  closed='right', dtype='interval[datetime64[ns]]')

    The ``freq`` parameter specifies the frequency between the left and right.
    endpoints of the individual intervals within the ``IntervalIndex``.  For
    numeric ``start`` and ``end``, the frequency must also be numeric.

    >>> pd.interval_range(start=0, periods=4, freq=1.5)
    IntervalIndex([(0.0, 1.5], (1.5, 3.0], (3.0, 4.5], (4.5, 6.0]],
                  closed='right', dtype='interval[float64]')

    Similarly, for datetime-like ``start`` and ``end``, the frequency must be
    convertible to a DateOffset.

    >>> pd.interval_range(start=pd.Timestamp('2017-01-01'),
    ...                   periods=3, freq='MS')
    IntervalIndex([(2017-01-01, 2017-02-01], (2017-02-01, 2017-03-01],
                   (2017-03-01, 2017-04-01]],
                  closed='right', dtype='interval[datetime64[ns]]')

    Specify ``start``, ``end``, and ``periods``; the frequency is generated
    automatically (linearly spaced).

    >>> pd.interval_range(start=0, end=6, periods=4)
    IntervalIndex([(0.0, 1.5], (1.5, 3.0], (3.0, 4.5], (4.5, 6.0]],
              closed='right',
              dtype='interval[float64]')

    The ``closed`` parameter specifies which endpoints of the individual
    intervals within the ``IntervalIndex`` are closed.

    >>> pd.interval_range(end=5, periods=4, closed='both')
    IntervalIndex([[1, 2], [2, 3], [3, 4], [4, 5]],
                  closed='both', dtype='interval[int64]')
    """
    start = maybe_box_datetimelike(start)
    end = maybe_box_datetimelike(end)
    endpoint = start if start is not None else end

    if freq is None and com.any_none(periods, start, end):
        freq = 1 if is_number(endpoint) else "D"

    if com.count_not_none(start, end, periods, freq) != 3:
        raise ValueError(
            "Of the four parameters: start, end, periods, and "
            "freq, exactly three must be specified"
        )

    if not _is_valid_endpoint(start):
        raise ValueError(f"start must be numeric or datetime-like, got {start}")
    elif not _is_valid_endpoint(end):
        raise ValueError(f"end must be numeric or datetime-like, got {end}")

    if is_float(periods):
        periods = int(periods)
    elif not is_integer(periods) and periods is not None:
        raise TypeError(f"periods must be a number, got {periods}")

    if freq is not None and not is_number(freq):
        try:
            freq = to_offset(freq)
        except ValueError as err:
            raise ValueError(
                f"freq must be numeric or convertible to DateOffset, got {freq}"
            ) from err

    # verify type compatibility
    if not all(
        [
            _is_type_compatible(start, end),
            _is_type_compatible(start, freq),
            _is_type_compatible(end, freq),
        ]
    ):
        raise TypeError("start, end, freq need to be type compatible")

    # +1 to convert interval count to breaks count (n breaks = n-1 intervals)
    if periods is not None:
        periods += 1

    if is_number(endpoint):
        # force consistency between start/end/freq (lower end if freq skips it)
        if com.all_not_none(start, end, freq):
            end -= (end - start) % freq

        # compute the period/start/end if unspecified (at most one)
        if periods is None:
            periods = int((end - start) // freq) + 1
        elif start is None:
            start = end - (periods - 1) * freq
        elif end is None:
            end = start + (periods - 1) * freq

        breaks = np.linspace(start, end, periods)
        if all(is_integer(x) for x in com.not_none(start, end, freq)):
            # np.linspace always produces float output
            breaks = maybe_downcast_numeric(breaks, np.dtype("int64"))
    else:
        # delegate to the appropriate range function
        if isinstance(endpoint, Timestamp):
            breaks = date_range(start=start, end=end, periods=periods, freq=freq)
        else:
            breaks = timedelta_range(start=start, end=end, periods=periods, freq=freq)

    return IntervalIndex.from_breaks(breaks, name=name, closed=closed)
def test_agg_misc():
    # test with all three Resampler apis and TimeGrouper

    np.random.seed(1234)
    index = date_range(datetime(2005, 1, 1), datetime(2005, 1, 10), freq="D")
    index.name = "date"
    df = DataFrame(np.random.rand(10, 2), columns=list("AB"), index=index)
    df_col = df.reset_index()
    df_mult = df_col.copy()
    df_mult.index = pd.MultiIndex.from_arrays(
        [range(10), df.index], names=["index", "date"]
    )

    r = df.resample("2D")
    cases = [
        r,
        df_col.resample("2D", on="date"),
        df_mult.resample("2D", level="date"),
        df.groupby(pd.Grouper(freq="2D")),
    ]

    # passed lambda
    for t in cases:
        result = t.agg({"A": np.sum, "B": lambda x: np.std(x, ddof=1)})
        rcustom = t["B"].apply(lambda x: np.std(x, ddof=1))
        expected = pd.concat([r["A"].sum(), rcustom], axis=1)
        tm.assert_frame_equal(result, expected, check_like=True)

    # agg with renamers
    expected = pd.concat(
        [t["A"].sum(), t["B"].sum(), t["A"].mean(), t["B"].mean()], axis=1
    )
    expected.columns = pd.MultiIndex.from_tuples(
        [("result1", "A"), ("result1", "B"), ("result2", "A"), ("result2", "B")]
    )

    msg = r"Column\(s\) \['result1', 'result2'\] do not exist"
    for t in cases:
        with pytest.raises(pd.core.base.SpecificationError, match=msg):
            t[["A", "B"]].agg(OrderedDict([("result1", np.sum), ("result2", np.mean)]))

    # agg with different hows
    expected = pd.concat(
        [t["A"].sum(), t["A"].std(), t["B"].mean(), t["B"].std()], axis=1
    )
    expected.columns = pd.MultiIndex.from_tuples(
        [("A", "sum"), ("A", "std"), ("B", "mean"), ("B", "std")]
    )
    for t in cases:
        result = t.agg(OrderedDict([("A", ["sum", "std"]), ("B", ["mean", "std"])]))
        tm.assert_frame_equal(result, expected, check_like=True)

    # equivalent of using a selection list / or not
    for t in cases:
        result = t[["A", "B"]].agg({"A": ["sum", "std"], "B": ["mean", "std"]})
        tm.assert_frame_equal(result, expected, check_like=True)

    msg = "nested renamer is not supported"

    # series like aggs
    for t in cases:
        with pytest.raises(pd.core.base.SpecificationError, match=msg):
            t["A"].agg({"A": ["sum", "std"]})

        with pytest.raises(pd.core.base.SpecificationError, match=msg):
            t["A"].agg({"A": ["sum", "std"], "B": ["mean", "std"]})

    # errors
    # invalid names in the agg specification
    msg = "\"Column 'B' does not exist!\""
    for t in cases:
        with pytest.raises(KeyError, match=msg):
            t[["A"]].agg({"A": ["sum", "std"], "B": ["mean", "std"]})
from textwrap import dedent

import numpy as np

import pandas as pd
from pandas import DataFrame, Series, Timestamp
from pandas.core.indexes.datetimes import date_range
import pandas.util.testing as tm
from pandas.util.testing import assert_frame_equal, assert_series_equal

test_frame = DataFrame({
    'A': [1] * 20 + [2] * 12 + [3] * 8,
    'B': np.arange(40)
},
                       index=date_range('1/1/2000', freq='s', periods=40))


def test_tab_complete_ipython6_warning(ip):
    from IPython.core.completer import provisionalcompleter
    code = dedent("""\
    import pandas.util.testing as tm
    s = tm.makeTimeSeries()
    rs = s.resample("D")
    """)
    ip.run_code(code)

    with tm.assert_produces_warning(None):
        with provisionalcompleter('ignore'):
            list(ip.Completer.completions('rs.', 1))
Exemple #42
0
def test_agg():
    # test with all three Resampler apis and TimeGrouper

    np.random.seed(1234)
    index = date_range(datetime(2005, 1, 1), datetime(2005, 1, 10), freq='D')
    index.name = 'date'
    df = DataFrame(np.random.rand(10, 2), columns=list('AB'), index=index)
    df_col = df.reset_index()
    df_mult = df_col.copy()
    df_mult.index = pd.MultiIndex.from_arrays([range(10), df.index],
                                              names=['index', 'date'])
    r = df.resample('2D')
    cases = [
        r,
        df_col.resample('2D', on='date'),
        df_mult.resample('2D', level='date'),
        df.groupby(pd.Grouper(freq='2D'))
    ]

    a_mean = r['A'].mean()
    a_std = r['A'].std()
    a_sum = r['A'].sum()
    b_mean = r['B'].mean()
    b_std = r['B'].std()
    b_sum = r['B'].sum()

    expected = pd.concat([a_mean, a_std, b_mean, b_std], axis=1)
    expected.columns = pd.MultiIndex.from_product([['A', 'B'], ['mean',
                                                                'std']])
    for t in cases:
        result = t.aggregate([np.mean, np.std])
        assert_frame_equal(result, expected)

    expected = pd.concat([a_mean, b_std], axis=1)
    for t in cases:
        result = t.aggregate({'A': np.mean, 'B': np.std})
        assert_frame_equal(result, expected, check_like=True)

    expected = pd.concat([a_mean, a_std], axis=1)
    expected.columns = pd.MultiIndex.from_tuples([('A', 'mean'), ('A', 'std')])
    for t in cases:
        result = t.aggregate({'A': ['mean', 'std']})
        assert_frame_equal(result, expected)

    expected = pd.concat([a_mean, a_sum], axis=1)
    expected.columns = ['mean', 'sum']
    for t in cases:
        result = t['A'].aggregate(['mean', 'sum'])
    assert_frame_equal(result, expected)

    expected = pd.concat([a_mean, a_sum], axis=1)
    expected.columns = pd.MultiIndex.from_tuples([('A', 'mean'), ('A', 'sum')])
    for t in cases:
        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            result = t.aggregate({'A': {'mean': 'mean', 'sum': 'sum'}})
        assert_frame_equal(result, expected, check_like=True)

    expected = pd.concat([a_mean, a_sum, b_mean, b_sum], axis=1)
    expected.columns = pd.MultiIndex.from_tuples([('A', 'mean'), ('A', 'sum'),
                                                  ('B', 'mean2'),
                                                  ('B', 'sum2')])
    for t in cases:
        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            result = t.aggregate({
                'A': {
                    'mean': 'mean',
                    'sum': 'sum'
                },
                'B': {
                    'mean2': 'mean',
                    'sum2': 'sum'
                }
            })
        assert_frame_equal(result, expected, check_like=True)

    expected = pd.concat([a_mean, a_std, b_mean, b_std], axis=1)
    expected.columns = pd.MultiIndex.from_tuples([('A', 'mean'), ('A', 'std'),
                                                  ('B', 'mean'), ('B', 'std')])
    for t in cases:
        result = t.aggregate({'A': ['mean', 'std'], 'B': ['mean', 'std']})
        assert_frame_equal(result, expected, check_like=True)

    expected = pd.concat([a_mean, a_sum, b_mean, b_sum], axis=1)
    expected.columns = pd.MultiIndex.from_tuples([('r1', 'A', 'mean'),
                                                  ('r1', 'A', 'sum'),
                                                  ('r2', 'B', 'mean'),
                                                  ('r2', 'B', 'sum')])
Exemple #43
0
from datetime import datetime
from operator import methodcaller

import numpy as np
import pytest

import pandas as pd
from pandas import DataFrame, Panel, Series
from pandas.core.indexes.datetimes import date_range
from pandas.core.resample import TimeGrouper
import pandas.util.testing as tm
from pandas.util.testing import assert_frame_equal, assert_series_equal

test_series = Series(np.random.randn(1000),
                     index=date_range('1/1/2000', periods=1000))


def test_apply():
    with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
        grouper = pd.TimeGrouper(freq='A', label='right', closed='right')

    grouped = test_series.groupby(grouper)

    def f(x):
        return x.sort_values()[-3:]

    applied = grouped.apply(f)
    expected = test_series.groupby(lambda x: x.year).apply(f)

    applied.index = applied.index.droplevel(0)
    expected.index = expected.index.droplevel(0)
Exemple #44
0
    def test_string_index_alias_tz_aware(self, tz):
        rng = date_range("1/1/2000", periods=10, tz=tz)
        ser = Series(np.random.randn(len(rng)), index=rng)

        result = ser["1/3/2000"]
        tm.assert_almost_equal(result, ser[2])