Example #1
0
def test_resample_upsampling_picked_but_not_correct():

    # Test for issue #3020
    dates = date_range('01-Jan-2014', '05-Jan-2014', freq='D')
    series = Series(1, index=dates)

    result = series.resample('D').mean()
    assert result.index[0] == dates[0]

    # GH 5955
    # incorrect deciding to upsample when the axis frequency matches the
    # resample frequency

    s = Series(np.arange(1., 6), index=[datetime(
        1975, 1, i, 12, 0) for i in range(1, 6)])
    expected = Series(np.arange(1., 6), index=date_range(
        '19750101', periods=5, freq='D'))

    result = s.resample('D').count()
    assert_series_equal(result, Series(1, index=expected.index))

    result1 = s.resample('D').sum()
    result2 = s.resample('D').mean()
    assert_series_equal(result1, expected)
    assert_series_equal(result2, expected)
Example #2
0
    def test_resample_upsampling_picked_but_not_correct(self):

        # Test for issue #3020
        dates = date_range('01-Jan-2014','05-Jan-2014', freq='D')
        series = Series(1, index=dates)

        result = series.resample('D')
        self.assertEqual(result.index[0], dates[0])

        # GH 5955
        # incorrect deciding to upsample when the axis frequency matches the resample frequency

        import datetime
        s = Series(np.arange(1.,6),index=[datetime.datetime(1975, 1, i, 12, 0) for i in range(1, 6)])
        expected = Series(np.arange(1.,6),index=date_range('19750101',periods=5,freq='D'))

        result = s.resample('D',how='count')
        assert_series_equal(result,Series(1,index=expected.index))

        result1 = s.resample('D',how='sum')
        result2 = s.resample('D',how='mean')
        result3 = s.resample('D')
        assert_series_equal(result1,expected)
        assert_series_equal(result2,expected)
        assert_series_equal(result3,expected)
 def test_resample_bms_2752(self):
     # GH2753
     foo = Series(index=pd.bdate_range('20000101', '20000201'))
     res1 = foo.resample("BMS").mean()
     res2 = foo.resample("BMS").mean().resample("B").mean()
     assert res1.index[0] == Timestamp('20000103')
     assert res1.index[0] == res2.index[0]
    def test_resample_loffset(self):
        rng = date_range('1/1/2000 00:00:00', '1/1/2000 00:13:00', freq='min')
        s = Series(np.random.randn(14), index=rng)

        result = s.resample('5min', how='mean', closed='right', label='right',
                            loffset=timedelta(minutes=1))
        idx = date_range('1/1/2000', periods=4, freq='5min')
        expected = Series([s[0], s[1:6].mean(), s[6:11].mean(), s[11:].mean()],
                          index=idx + timedelta(minutes=1))
        assert_series_equal(result, expected)

        expected = s.resample(
            '5min', how='mean', closed='right', label='right',
            loffset='1min')
        assert_series_equal(result, expected)

        expected = s.resample(
            '5min', how='mean', closed='right', label='right',
            loffset=Minute(1))
        assert_series_equal(result, expected)

        self.assert_(result.index.freq == Minute(5))

                # from daily
        dti = DatetimeIndex(
            start=datetime(2005, 1, 1), end=datetime(2005, 1, 10),
            freq='D')
        ser = Series(np.random.rand(len(dti)), dti)

        # to weekly
        result = ser.resample('w-sun', how='last')
        expected = ser.resample('w-sun', how='last', loffset=-bday)
        self.assertEqual(result.index[0] - bday, expected.index[0])
def slide12():
    rng = pd.date_range('1/1/2000', periods=100, freq='D')
    ts = Series(np.random.randn(len(rng)), index=rng)
    print 'timeseries'
    print ts
    print 'resample'
    print ts.resample('M', how='mean')
 def test_fill_method_and_how_upsample(self):
     # GH2073
     s = Series(np.arange(9, dtype='int64'),
                index=date_range('2010-01-01', periods=9, freq='Q'))
     last = s.resample('M').ffill()
     both = s.resample('M').ffill().resample('M').last().astype('int64')
     assert_series_equal(last, both)
Example #7
0
    def test_resample_loffset(self):
        rng = date_range("1/1/2000 00:00:00", "1/1/2000 00:13:00", freq="min")
        s = Series(np.random.randn(14), index=rng)

        result = s.resample("5min", how="mean", closed="right", label="right", loffset=timedelta(minutes=1))
        idx = date_range("1/1/2000", periods=4, freq="5min")
        expected = Series([s[0], s[1:6].mean(), s[6:11].mean(), s[11:].mean()], index=idx + timedelta(minutes=1))
        assert_series_equal(result, expected)

        expected = s.resample("5min", how="mean", closed="right", label="right", loffset="1min")
        assert_series_equal(result, expected)

        expected = s.resample("5min", how="mean", closed="right", label="right", loffset=Minute(1))
        assert_series_equal(result, expected)

        self.assertEqual(result.index.freq, Minute(5))

        # from daily
        dti = DatetimeIndex(start=datetime(2005, 1, 1), end=datetime(2005, 1, 10), freq="D")
        ser = Series(np.random.rand(len(dti)), dti)

        # to weekly
        result = ser.resample("w-sun", how="last")
        expected = ser.resample("w-sun", how="last", loffset=-bday)
        self.assertEqual(result.index[0] - bday, expected.index[0])
Example #8
0
    def test_resample_tz_localized(self):
        dr = date_range(start="2012-4-13", end="2012-5-1")
        ts = Series(lrange(len(dr)), dr)

        ts_utc = ts.tz_localize("UTC")
        ts_local = ts_utc.tz_convert("America/Los_Angeles")

        result = ts_local.resample("W")

        ts_local_naive = ts_local.copy()
        ts_local_naive.index = [x.replace(tzinfo=None) for x in ts_local_naive.index.to_pydatetime()]

        exp = ts_local_naive.resample("W").tz_localize("America/Los_Angeles")

        assert_series_equal(result, exp)

        # it works
        result = ts_local.resample("D")

        # #2245
        idx = date_range("2001-09-20 15:59", "2001-09-20 16:00", freq="T", tz="Australia/Sydney")
        s = Series([1, 2], index=idx)

        result = s.resample("D", closed="right", label="right")
        ex_index = date_range("2001-09-21", periods=1, freq="D", tz="Australia/Sydney")
        expected = Series([1.5], index=ex_index)

        assert_series_equal(result, expected)

        # for good measure
        result = s.resample("D", kind="period")
        ex_index = period_range("2001-09-20", periods=1, freq="D")
        expected = Series([1.5], index=ex_index)
        assert_series_equal(result, expected)
Example #9
0
    def test_resample_upsampling_picked_but_not_correct(self):

        # Test for issue #3020
        dates = date_range("01-Jan-2014", "05-Jan-2014", freq="D")
        series = Series(1, index=dates)

        result = series.resample("D")
        self.assertEquals(result.index[0], dates[0])

        # GH 5955
        # incorrect deciding to upsample when the axis frequency matches the resample frequency

        import datetime

        s = Series(np.arange(1.0, 6), index=[datetime.datetime(1975, 1, i, 12, 0) for i in range(1, 6)])
        expected = Series(np.arange(1.0, 6), index=date_range("19750101", periods=5, freq="D"))

        result = s.resample("D", how="count")
        assert_series_equal(result, Series(1, index=expected.index))

        result1 = s.resample("D", how="sum")
        result2 = s.resample("D", how="mean")
        result3 = s.resample("D")
        assert_series_equal(result1, expected)
        assert_series_equal(result2, expected)
        assert_series_equal(result3, expected)
    def test_weekly_resample_buglet(self):
        # #1327
        rng = date_range('1/1/2000', freq='B', periods=20)
        ts = Series(np.random.randn(len(rng)), index=rng)

        resampled = ts.resample('W')
        expected = ts.resample('W-SUN')
        assert_series_equal(resampled, expected)
Example #11
0
def resamplingTs():
    rng = Series(np.arange(13),index=pd.period_range('2010Q1','2011Q1',freq='M'))
    print (rng)
    tsQ1 = rng.resample('Q-DEC', how='mean')
    print (tsQ1)
    tsQ2 = rng.resample('Q-DEC', how=resamplingFunction)
    print (tsQ2)
    tsQ3 = rng.resample('Q-DEC', how=returnFunction)
    print (tsQ3)
Example #12
0
class ResampleDatetetime64:
    # GH 7754
    def setup(self):
        rng3 = date_range(start='2000-01-01 00:00:00',
                          end='2000-01-01 10:00:00', freq='555000U')
        self.dt_ts = Series(5, rng3, dtype='datetime64[ns]')

    def time_resample(self):
        self.dt_ts.resample('1S').last()
Example #13
0
class TestTimeGrouper(unittest.TestCase):

    def setUp(self):
        self.ts = Series(np.random.randn(1000),
                         index=date_range('1/1/2000', periods=1000))

    def test_apply(self):
        grouper = TimeGrouper('A', label='right', closed='right')

        grouped = self.ts.groupby(grouper)

        f = lambda x: x.order()[-3:]

        applied = grouped.apply(f)
        expected = self.ts.groupby(lambda x: x.year).apply(f)

        applied.index = applied.index.droplevel(0)
        expected.index = expected.index.droplevel(0)
        assert_series_equal(applied, expected)

    def test_count(self):
        self.ts[::3] = np.nan

        grouper = TimeGrouper('A', label='right', closed='right')
        result = self.ts.resample('A', how='count')

        expected = self.ts.groupby(lambda x: x.year).count()
        expected.index = result.index

        assert_series_equal(result, expected)

    def test_numpy_reduction(self):
        result = self.ts.resample('A', how='prod', closed='right')

        expected = self.ts.groupby(lambda x: x.year).agg(np.prod)
        expected.index = result.index

        assert_series_equal(result, expected)

    def test_apply_iteration(self):
        # #2300
        N = 1000
        ind = pd.date_range(start="2000-01-01", freq="D", periods=N)
        df = DataFrame({'open':1, 'close':2}, index=ind)
        tg = TimeGrouper('M')

        grouper = tg.get_grouper(df)

        # Errors

        grouped = df.groupby(grouper, group_keys=False)
        f = lambda df: df['close'] / df['open']

        # it works!
        result = grouped.apply(f)
        self.assertTrue(result.index.equals(df.index))
Example #14
0
    def test_resample_tz_localized(self):
        dr = date_range(start='2012-4-13', end='2012-5-1')
        ts = Series(lrange(len(dr)), dr)

        ts_utc = ts.tz_localize('UTC')
        ts_local = ts_utc.tz_convert('America/Los_Angeles')

        result = ts_local.resample('W').mean()

        ts_local_naive = ts_local.copy()
        ts_local_naive.index = [x.replace(tzinfo=None)
                                for x in ts_local_naive.index.to_pydatetime()]

        exp = ts_local_naive.resample(
            'W').mean().tz_localize('America/Los_Angeles')

        assert_series_equal(result, exp)

        # it works
        result = ts_local.resample('D').mean()

        # #2245
        idx = date_range('2001-09-20 15:59', '2001-09-20 16:00', freq='T',
                         tz='Australia/Sydney')
        s = Series([1, 2], index=idx)

        result = s.resample('D', closed='right', label='right').mean()
        ex_index = date_range('2001-09-21', periods=1, freq='D',
                              tz='Australia/Sydney')
        expected = Series([1.5], index=ex_index)

        assert_series_equal(result, expected)

        # for good measure
        result = s.resample('D', kind='period').mean()
        ex_index = period_range('2001-09-20', periods=1, freq='D')
        expected = Series([1.5], index=ex_index)
        assert_series_equal(result, expected)

        # GH 6397
        # comparing an offset that doesn't propagate tz's
        rng = date_range('1/1/2011', periods=20000, freq='H')
        rng = rng.tz_localize('EST')
        ts = DataFrame(index=rng)
        ts['first'] = np.random.randn(len(rng))
        ts['second'] = np.cumsum(np.random.randn(len(rng)))
        expected = DataFrame(
            {
                'first': ts.resample('A').sum()['first'],
                'second': ts.resample('A').mean()['second']},
            columns=['first', 'second'])
        result = ts.resample(
            'A').agg({'first': np.sum,
                      'second': np.mean}).reindex(columns=['first', 'second'])
        assert_frame_equal(result, expected)
    def test_resample_weekly_all_na(self):
        rng = date_range('1/1/2000', periods=10, freq='W-WED')
        ts = Series(np.random.randn(len(rng)), index=rng)

        result = ts.resample('W-THU')

        self.assert_(result.isnull().all())

        result = ts.resample('W-THU', fill_method='ffill')[:-1]
        expected = ts.asfreq('W-THU', method='ffill')
        assert_series_equal(result, expected)
Example #16
0
    def test_resample_weekly_all_na(self):
        rng = date_range("1/1/2000", periods=10, freq="W-WED")
        ts = Series(np.random.randn(len(rng)), index=rng)

        result = ts.resample("W-THU")

        self.assert_(result.isnull().all())

        result = ts.resample("W-THU", fill_method="ffill")[:-1]
        expected = ts.asfreq("W-THU", method="ffill")
        assert_series_equal(result, expected)
Example #17
0
    def test_secondary_y_mixed_freq_ts_xlim(self):
        # GH 3490 - mixed frequency timeseries with secondary y
        rng = date_range('2000-01-01', periods=10000, freq='min')
        ts = Series(1, index=rng)

        ax = ts.plot()
        left_before, right_before = ax.get_xlim()
        ts.resample('D').plot(secondary_y=True, ax=ax)
        left_after, right_after = ax.get_xlim()

        # a downsample should not have changed either limit
        self.assertEqual(left_before, left_after)
        self.assertEqual(right_before, right_after)
Example #18
0
    def test_metadata_propagation_indiv(self):
        # check that the metadata matches up on the resulting ops

        o = Series(range(3), range(3))
        o.name = 'foo'
        o2 = Series(range(3), range(3))
        o2.name = 'bar'

        result = o.T
        self.check_metadata(o, result)

        # resample
        ts = Series(np.random.rand(1000),
                    index=date_range('20130101', periods=1000, freq='s'),
                    name='foo')
        result = ts.resample('1T').mean()
        self.check_metadata(ts, result)

        result = ts.resample('1T').min()
        self.check_metadata(ts, result)

        result = ts.resample('1T').apply(lambda x: x.sum())
        self.check_metadata(ts, result)

        _metadata = Series._metadata
        _finalize = Series.__finalize__
        Series._metadata = ['name', 'filename']
        o.filename = 'foo'
        o2.filename = 'bar'

        def finalize(self, other, method=None, **kwargs):
            for name in self._metadata:
                if method == 'concat' and name == 'filename':
                    value = '+'.join([getattr(
                        o, name) for o in other.objs if getattr(o, name, None)
                    ])
                    object.__setattr__(self, name, value)
                else:
                    object.__setattr__(self, name, getattr(other, name, None))

            return self

        Series.__finalize__ = finalize

        result = pd.concat([o, o2])
        assert result.filename == 'foo+bar'
        assert result.name is None

        # reset
        Series._metadata = _metadata
        Series.__finalize__ = _finalize
def date_11():
    from pandas.tseries.offsets import Day,MonthEnd
    now=datetime(2011,11,17)
    print now+3*Day()
    print now+MonthEnd()
    print now+MonthEnd(2)

    offset=MonthEnd()
    print offset.rollforward(now)
    print offset.rollback(now)

    ts=Series(np.random.randn(20),index=pd.date_range('1/15/2000',periods=20,freq='4d'))
    print ts.groupby(offset.rollforward).mean()
    print ts.resample('M',how='mean')
Example #20
0
    def test_secondary_y_mixed_freq_ts_xlim(self):
        # GH 3490 - mixed frequency timeseries with secondary y
        rng = date_range('2000-01-01', periods=10000, freq='min')
        ts = Series(1, index=rng)

        _, ax = self.plt.subplots()
        ts.plot(ax=ax)
        left_before, right_before = ax.get_xlim()
        ts.resample('D').mean().plot(secondary_y=True, ax=ax)
        left_after, right_after = ax.get_xlim()

        # a downsample should not have changed either limit
        assert left_before == left_after
        assert right_before == right_after
Example #21
0
def test_resample_base_with_timedeltaindex():

    # GH 10530
    rng = timedelta_range(start='0s', periods=25, freq='s')
    ts = Series(np.random.randn(len(rng)), index=rng)

    with_base = ts.resample('2s', base=5).mean()
    without_base = ts.resample('2s').mean()

    exp_without_base = timedelta_range(start='0s', end='25s', freq='2s')
    exp_with_base = timedelta_range(start='5s', end='29s', freq='2s')

    tm.assert_index_equal(without_base.index, exp_without_base)
    tm.assert_index_equal(with_base.index, exp_with_base)
Example #22
0
    def test_closed_left_corner(self):
        # #1465
        s = Series(np.random.randn(21),
                   index=date_range(start='1/1/2012 9:30',
                                    freq='1min', periods=21))
        s[0] = np.nan

        result = s.resample('10min', how='mean',closed='left', label='right')
        exp = s[1:].resample('10min', how='mean',closed='left', label='right')
        assert_series_equal(result, exp)

        result = s.resample('10min', how='mean',closed='left', label='left')
        exp = s[1:].resample('10min', how='mean',closed='left', label='left')
        assert_series_equal(result, exp)
Example #23
0
    def test_resample_tz_localized(self):
        dr = date_range(start="2012-4-13", end="2012-5-1")
        ts = Series(lrange(len(dr)), dr)

        ts_utc = ts.tz_localize("UTC")
        ts_local = ts_utc.tz_convert("America/Los_Angeles")

        result = ts_local.resample("W")

        ts_local_naive = ts_local.copy()
        ts_local_naive.index = [x.replace(tzinfo=None) for x in ts_local_naive.index.to_pydatetime()]

        exp = ts_local_naive.resample("W").tz_localize("America/Los_Angeles")

        assert_series_equal(result, exp)

        # it works
        result = ts_local.resample("D")

        # #2245
        idx = date_range("2001-09-20 15:59", "2001-09-20 16:00", freq="T", tz="Australia/Sydney")
        s = Series([1, 2], index=idx)

        result = s.resample("D", closed="right", label="right")
        ex_index = date_range("2001-09-21", periods=1, freq="D", tz="Australia/Sydney")
        expected = Series([1.5], index=ex_index)

        assert_series_equal(result, expected)

        # for good measure
        result = s.resample("D", kind="period")
        ex_index = period_range("2001-09-20", periods=1, freq="D")
        expected = Series([1.5], index=ex_index)
        assert_series_equal(result, expected)

        # GH 6397
        # comparing an offset that doesn't propogate tz's
        rng = date_range("1/1/2011", periods=20000, freq="H")
        rng = rng.tz_localize("EST")
        ts = DataFrame(index=rng)
        ts["first"] = np.random.randn(len(rng))
        ts["second"] = np.cumsum(np.random.randn(len(rng)))
        expected = DataFrame(
            {"first": ts.resample("A", how=np.sum)["first"], "second": ts.resample("A", how=np.mean)["second"]},
            columns=["first", "second"],
        )
        result = ts.resample("A", how={"first": np.sum, "second": np.mean}).reindex(columns=["first", "second"])
        assert_frame_equal(result, expected)
    def test_quarterly_resampling(self):
        rng = period_range('2000Q1', periods=10, freq='Q-DEC')
        ts = Series(np.arange(10), index=rng)

        result = ts.resample('A')
        exp = ts.to_timestamp().resample('A').to_period()
        assert_series_equal(result, exp)
Example #25
0
 def test_resample_ambiguous_time_bin_edge(self):
     # GH 10117
     idx = pd.date_range("2014-10-25 22:00:00", "2014-10-26 00:30:00",
                         freq="30T", tz="Europe/London")
     expected = Series(np.zeros(len(idx)), index=idx)
     result = expected.resample('30T').mean()
     tm.assert_series_equal(result, expected)
    def test_upsample_with_limit(self):
        rng = date_range('1/1/2000', periods=3, freq='5t')
        ts = Series(np.random.randn(len(rng)), rng)

        result = ts.resample('t', fill_method='ffill', limit=2)
        expected = ts.reindex(result.index, method='ffill', limit=2)
        assert_series_equal(result, expected)
Example #27
0
    def test_closed_left_corner(self):
        # #1465
        s = Series(np.random.randn(21), index=date_range(start="1/1/2012 9:30", freq="1min", periods=21))
        s[0] = np.nan

        result = s.resample("10min", how="mean", closed="left", label="right")
        exp = s[1:].resample("10min", how="mean", closed="left", label="right")
        assert_series_equal(result, exp)

        result = s.resample("10min", how="mean", closed="left", label="left")
        exp = s[1:].resample("10min", how="mean", closed="left", label="left")

        ex_index = date_range(start="1/1/2012 9:30", freq="10min", periods=3)

        self.assert_(result.index.equals(ex_index))
        assert_series_equal(result, exp)
Example #28
0
    def test_upsample_with_limit(self):
        rng = period_range("1/1/2000", periods=5, freq="A")
        ts = Series(np.random.randn(len(rng)), rng)

        result = ts.resample("M", fill_method="ffill", limit=2, convention="end")
        expected = ts.asfreq("M").reindex(result.index, method="ffill", limit=2)
        assert_series_equal(result, expected)
    def test_all_values_single_bin(self):
        # 2070
        index = period_range(start="2012-01-01", end="2012-12-31", freq="M")
        s = Series(np.random.randn(len(index)), index=index)

        result = s.resample("A", how='mean')
        tm.assert_almost_equal(result[0], s.mean())
Example #30
0
    def test_annual_upsample(self):
        targets = ['D', 'B', 'M']

        for month in MONTHS:
            ts = _simple_pts('1/1/1990', '12/31/1995', freq='A-%s' % month)

            for targ, conv, meth in product(targets, ['start', 'end'],
                                            ['ffill', 'bfill']):
                result = ts.resample(targ, fill_method=meth,
                                     convention=conv)
                expected = result.to_timestamp(targ, how=conv)
                expected = expected.asfreq(targ, meth).to_period()
                assert_series_equal(result, expected)

        df = DataFrame({'a' : ts})
        rdf = df.resample('D', fill_method='ffill')
        exp = df['a'].resample('D', fill_method='ffill')
        assert_series_equal(rdf['a'], exp)


        rng = period_range('2000', '2003', freq='A-DEC')
        ts = Series([1, 2, 3, 4], index=rng)

        result = ts.resample('M', fill_method='ffill')
        ex_index = period_range('2000-01', '2003-12', freq='M')

        expected = ts.asfreq('M', how='start').reindex(ex_index,
                                                       method='ffill')
        assert_series_equal(result, expected)
def test_resample_size():
    n = 10000
    dr = date_range('2015-09-19', periods=n, freq='T')
    ts = Series(np.random.randn(n), index=np.random.choice(dr, n))

    left = ts.resample('7T').size()
    ix = date_range(start=left.index.min(), end=ts.index.max(), freq='7T')

    bins = np.searchsorted(ix.values, ts.index.values, side='right')
    val = np.bincount(bins, minlength=len(ix) + 1)[1:].astype('int64',
                                                              copy=False)

    right = Series(val, index=ix)
    assert_series_equal(left, right)
def test_resample_upsample():
    # from daily
    dti = date_range(start=datetime(2005, 1, 1),
                     end=datetime(2005, 1, 10), freq='D', name='index')

    s = Series(np.random.rand(len(dti)), dti)

    # to minutely, by padding
    result = s.resample('Min').pad()
    assert len(result) == 12961
    assert result[0] == s[0]
    assert result[-1] == s[-1]

    assert result.index.name == 'index'
def test_resample_how_method():
    # GH9915
    s = Series([11, 22],
               index=[Timestamp('2015-03-31 21:48:52.672000'),
                      Timestamp('2015-03-31 21:49:52.739000')])
    expected = Series([11, np.NaN, np.NaN, np.NaN, np.NaN, np.NaN, 22],
                      index=[Timestamp('2015-03-31 21:48:50'),
                             Timestamp('2015-03-31 21:49:00'),
                             Timestamp('2015-03-31 21:49:10'),
                             Timestamp('2015-03-31 21:49:20'),
                             Timestamp('2015-03-31 21:49:30'),
                             Timestamp('2015-03-31 21:49:40'),
                             Timestamp('2015-03-31 21:49:50')])
    assert_series_equal(s.resample("10S").mean(), expected)
def test_corner_cases(simple_period_range_series,
                      simple_date_range_series):
    # miscellaneous test coverage

    rng = date_range('1/1/2000', periods=12, freq='t')
    ts = Series(np.random.randn(len(rng)), index=rng)

    result = ts.resample('5t', closed='right', label='left').mean()
    ex_index = date_range('1999-12-31 23:55', periods=4, freq='5t')
    tm.assert_index_equal(result.index, ex_index)

    len0pts = simple_period_range_series(
        '2007-01', '2010-05', freq='M')[:0]
    # it works
    result = len0pts.resample('A-DEC').mean()
    assert len(result) == 0

    # resample to periods
    ts = simple_date_range_series(
        '2000-04-28', '2000-04-30 11:00', freq='h')
    result = ts.resample('M', kind='period').mean()
    assert len(result) == 1
    assert result.index[0] == Period('2000-04', freq='M')
Example #35
0
def test_resample_nunique_with_date_gap():
    # GH 13453
    index = pd.date_range("1-1-2000", "2-15-2000", freq="h")
    index2 = pd.date_range("4-15-2000", "5-15-2000", freq="h")
    index3 = index.append(index2)
    s = Series(range(len(index3)), index=index3, dtype="int64")
    r = s.resample("M")

    # Since all elements are unique, these should all be the same
    results = [r.count(), r.nunique(), r.agg(Series.nunique), r.agg("nunique")]

    tm.assert_series_equal(results[0], results[1])
    tm.assert_series_equal(results[0], results[2])
    tm.assert_series_equal(results[0], results[3])
Example #36
0
def test_resample_loffset(loffset):
    # GH 7687
    rng = date_range("1/1/2000 00:00:00", "1/1/2000 00:13:00", freq="min")
    s = Series(np.random.randn(14), index=rng)

    result = s.resample("5min", closed="right", label="right", loffset=loffset).mean()
    idx = date_range("1/1/2000", periods=4, freq="5min")
    expected = Series(
        [s[0], s[1:6].mean(), s[6:11].mean(), s[11:].mean()],
        index=idx + timedelta(minutes=1),
    )
    tm.assert_series_equal(result, expected)
    assert result.index.freq == Minute(5)

    # from daily
    dti = date_range(start=datetime(2005, 1, 1), end=datetime(2005, 1, 10), freq="D")
    ser = Series(np.random.rand(len(dti)), dti)

    # to weekly
    result = ser.resample("w-sun").last()
    business_day_offset = BDay()
    expected = ser.resample("w-sun", loffset=-business_day_offset).last()
    assert result.index[0] - business_day_offset == expected.index[0]
Example #37
0
def test_nanosecond_resample_error():
    # GH 12307 - Values falls after last bin when
    # Resampling using pd.tseries.offsets.Nano as period
    start = 1443707890427
    exp_start = 1443707890400
    indx = pd.date_range(start=pd.to_datetime(start), periods=10, freq="100n")
    ts = Series(range(len(indx)), index=indx)
    r = ts.resample(pd.tseries.offsets.Nano(100))
    result = r.agg("mean")

    exp_indx = pd.date_range(start=pd.to_datetime(exp_start), periods=10, freq="100n")
    exp = Series(range(len(exp_indx)), index=exp_indx)

    tm.assert_series_equal(result, exp)
Example #38
0
def test_api_compat_before_use(attr):

    # make sure that we are setting the binner
    # on these attributes
    rng = date_range("1/1/2012", periods=100, freq="S")
    ts = Series(np.arange(len(rng)), index=rng)
    rs = ts.resample("30s")

    # before use
    getattr(rs, attr)

    # after grouper is initialized is ok
    rs.mean()
    getattr(rs, attr)
Example #39
0
    def test_upsampling_ohlc(self, freq, period_mult, kind):
        # GH 13083
        pi = PeriodIndex(start='2000', freq='D', periods=10)
        s = Series(range(len(pi)), index=pi)
        expected = s.to_timestamp().resample(freq).ohlc().to_period(freq)

        # timestamp-based resampling doesn't include all sub-periods
        # of the last original period, so extend accordingly:
        new_index = PeriodIndex(start='2000',
                                freq=freq,
                                periods=period_mult * len(pi))
        expected = expected.reindex(new_index)
        result = s.resample(freq, kind=kind).ohlc()
        assert_frame_equal(result, expected)
Example #40
0
 def test_resample_with_pytz(self):
     # GH 13238
     s = Series(2,
                index=pd.date_range('2017-01-01',
                                    periods=48,
                                    freq="H",
                                    tz="US/Eastern"))
     result = s.resample("D").mean()
     expected = Series(2,
                       index=pd.DatetimeIndex(['2017-01-01', '2017-01-02'],
                                              tz="US/Eastern"))
     assert_series_equal(result, expected)
     # Especially assert that the timezone is LMT for pytz
     assert result.index.tz == pytz.timezone('US/Eastern')
Example #41
0
    def test_metadata_propagation_indiv(self):
        # check that the metadata matches up on the resulting ops

        o = Series(range(3), range(3))
        o.name = 'foo'
        o2 = Series(range(3), range(3))
        o2.name = 'bar'

        result = o.T
        self.check_metadata(o, result)

        # resample
        ts = Series(np.random.rand(1000),
                    index=date_range('20130101', periods=1000, freq='s'),
                    name='foo')
        result = ts.resample('1T')
        self.check_metadata(ts, result)

        result = ts.resample('1T', how='min')
        self.check_metadata(ts, result)

        result = ts.resample('1T', how=lambda x: x.sum())
        self.check_metadata(ts, result)
Example #42
0
    def test_resample_tz_localized(self):
        dr = date_range(start='2012-4-13', end='2012-5-1')
        ts = Series(lrange(len(dr)), dr)

        ts_utc = ts.tz_localize('UTC')
        ts_local = ts_utc.tz_convert('America/Los_Angeles')

        result = ts_local.resample('W')

        ts_local_naive = ts_local.copy()
        ts_local_naive.index = [x.replace(tzinfo=None)
                                for x in ts_local_naive.index.to_pydatetime()]

        exp = ts_local_naive.resample('W').tz_localize('America/Los_Angeles')

        assert_series_equal(result, exp)

        # it works
        result = ts_local.resample('D')

        # #2245
        idx = date_range('2001-09-20 15:59', '2001-09-20 16:00', freq='T',
                         tz='Australia/Sydney')
        s = Series([1, 2], index=idx)

        result = s.resample('D', closed='right', label='right')
        ex_index = date_range('2001-09-21', periods=1, freq='D',
                              tz='Australia/Sydney')
        expected = Series([1.5], index=ex_index)

        assert_series_equal(result, expected)

        # for good measure
        result = s.resample('D', kind='period')
        ex_index = period_range('2001-09-20', periods=1, freq='D')
        expected = Series([1.5], index=ex_index)
        assert_series_equal(result, expected)
Example #43
0
class TestTimeGrouper(unittest.TestCase):
    def setUp(self):
        self.ts = Series(np.random.randn(1000),
                         index=date_range('1/1/2000', periods=1000))

    def test_apply(self):
        grouper = TimeGrouper('A', label='right', closed='right')

        grouped = self.ts.groupby(grouper)

        f = lambda x: x.order()[-3:]

        applied = grouped.apply(f)
        expected = self.ts.groupby(lambda x: x.year).apply(f)

        applied.index = applied.index.droplevel(0)
        expected.index = expected.index.droplevel(0)
        assert_series_equal(applied, expected)

    def test_count(self):
        self.ts[::3] = np.nan

        grouper = TimeGrouper('A', label='right', closed='right')
        result = self.ts.resample('A', how='count')

        expected = self.ts.groupby(lambda x: x.year).count()
        expected.index = result.index

        assert_series_equal(result, expected)

    def test_numpy_reduction(self):
        result = self.ts.resample('A', how='prod', closed='right')

        expected = self.ts.groupby(lambda x: x.year).agg(np.prod)
        expected.index = result.index

        assert_series_equal(result, expected)
Example #44
0
    def test_resample_upsample(self):
        # from daily
        dti = DatetimeIndex(
            start=datetime(2005, 1, 1), end=datetime(2005, 1, 10),
            freq='D', name='index')

        s = Series(np.random.rand(len(dti)), dti)

        # to minutely, by padding
        result = s.resample('Min', fill_method='pad')
        self.assertEquals(len(result), 12961)
        self.assertEquals(result[0], s[0])
        self.assertEquals(result[-1], s[-1])

        self.assertEqual(result.index.name, 'index')
def test_api_compat_before_use():

    # make sure that we are setting the binner
    # on these attributes
    for attr in ['groups', 'ngroups', 'indices']:
        rng = pd.date_range('1/1/2012', periods=100, freq='S')
        ts = Series(np.arange(len(rng)), index=rng)
        rs = ts.resample('30s')

        # before use
        getattr(rs, attr)

        # after grouper is initialized is ok
        rs.mean()
        getattr(rs, attr)
Example #46
0
    def test_annual_upsample(self, simple_period_range_series):
        ts = simple_period_range_series("1/1/1990", "12/31/1995", freq="A-DEC")
        df = DataFrame({"a": ts})
        rdf = df.resample("D").ffill()
        exp = df["a"].resample("D").ffill()
        tm.assert_series_equal(rdf["a"], exp)

        rng = period_range("2000", "2003", freq="A-DEC")
        ts = Series([1, 2, 3, 4], index=rng)

        result = ts.resample("M").ffill()
        ex_index = period_range("2000-01", "2003-12", freq="M")

        expected = ts.asfreq("M", how="start").reindex(ex_index, method="ffill")
        tm.assert_series_equal(result, expected)
Example #47
0
    def test_resample_anchored_ticks(self):
        # If a fixed delta (5 minute, 4 hour) evenly divides a day, we should
        # "anchor" the origin at midnight so we get regular intervals rather
        # than starting from the first timestamp which might start in the middle
        # of a desired interval

        rng = date_range('1/1/2000 04:00:00', periods=86400, freq='s')
        ts = Series(np.random.randn(len(rng)), index=rng)
        ts[:2] = np.nan  # so results are the same

        freqs = ['t', '5t', '15t', '30t', '4h', '12h']
        for freq in freqs:
            result = ts[2:].resample(freq, closed='left', label='left')
            expected = ts.resample(freq, closed='left', label='left')
            assert_series_equal(result, expected)
def test_resample_loffset_upsample():
    # GH 20744
    rng = date_range("1/1/2000 00:00:00", "1/1/2000 00:13:00", freq="min")
    s = Series(np.random.randn(14), index=rng)

    with tm.assert_produces_warning(FutureWarning):
        result = s.resample("5min",
                            closed="right",
                            label="right",
                            loffset=timedelta(minutes=1)).ffill()
    idx = date_range("1/1/2000", periods=4, freq="5min")
    expected = Series([s[0], s[5], s[10], s[-1]],
                      index=idx + timedelta(minutes=1))

    tm.assert_series_equal(result, expected)
Example #49
0
def slide6():
    dates = [
        datetime(2011, 1, 2),
        datetime(2011, 1, 5),
        datetime(2011, 1, 7),
        datetime(2011, 1, 8),
        datetime(2011, 1, 10),
        datetime(2011, 1, 12)
    ]
    ts = Series(np.random.randn(6), index=dates)
    print ts
    print ts.resample('D')

    index = pd.date_range('4/1/2012', '6/1/2012')
    print index
    print 'start'
    print pd.date_range(start='4/1/2012', periods=20)
    print 'end'
    print pd.date_range(end='6/1/2012', periods=20)
    print 'business end of month'
    print pd.date_range('1/1/2000', '12/1/2000', freq='BM')
    print pd.date_range('5/2/2012 12:56:31', periods=5)
    print 'normalize'
    print pd.date_range('5/2/2012 12:56:31', periods=5, normalize=True)
Example #50
0
def make_qtrly(s: pd.Series, t: str = 'first', name: str = None) -> pd.Series:
    s.index = pd.DatetimeIndex(s.index.values, dtype=dt.date)
    s.index.freq = s.index.inferred_freq
    name = name or s.name or ''
    # print(s)

    if t == 'mean':
        s = s.resample('1Q').mean().astype(np.float64)
    elif t == 'first':
        s = s.resample('1Q').first().astype(np.float64)
    elif t == 'last':
        s = s.resample('1Q').last().astype(np.float64)

    if s.isnull().any():
        print(
            f'Series {name} still has some empty data. Filling that in with the last known value.'
        )
        s.fillna(method='ffill', inplace=True)

    # Conform everything to the end of the quarter
    idx = s.index
    for i, v in enumerate(idx):
        v.replace(month=math.ceil(v.month / 3) * 3)
        v.replace(day=calendar.monthrange(v.year, v.month)[-1])
    s.index = idx

    # s.index = s.index + pd.Timedelta(3, unit='M') - pd.Timedelta(1, unit='d')

    # s.index = pd.to_datetime([d + relativedelta(days=1) for d in s.index])
    # s.index.freq = s.index.inferred_freq

    # I wanted to make this function more dynamic and eliminate the if/else bullshit, with the below line (which failed)
    # s = s.resample('3MS').apply(eval(t + '(self)', {"__builtins__": None}, safe_funcs)).astype(np.float64)

    # print(s)
    return s
Example #51
0
def test_resample_anchored_ticks():
    # If a fixed delta (5 minute, 4 hour) evenly divides a day, we should
    # "anchor" the origin at midnight so we get regular intervals rather
    # than starting from the first timestamp which might start in the
    # middle of a desired interval

    rng = date_range("1/1/2000 04:00:00", periods=86400, freq="s")
    ts = Series(np.random.randn(len(rng)), index=rng)
    ts[:2] = np.nan  # so results are the same

    freqs = ["t", "5t", "15t", "30t", "4h", "12h"]
    for freq in freqs:
        result = ts[2:].resample(freq, closed="left", label="left").mean()
        expected = ts.resample(freq, closed="left", label="left").mean()
        tm.assert_series_equal(result, expected)
def test_rolling_max_resample():

    indices = [datetime(1975, 1, i) for i in range(1, 6)]
    # So that we can have 3 datapoints on last day (4, 10, and 20)
    indices.append(datetime(1975, 1, 5, 1))
    indices.append(datetime(1975, 1, 5, 2))
    series = Series(list(range(0, 5)) + [10, 20], index=indices)
    # Use floats instead of ints as values
    series = series.map(lambda x: float(x))
    # Sort chronologically
    series = series.sort_index()

    # Default how should be max
    expected = Series(
        [0.0, 1.0, 2.0, 3.0, 20.0],
        index=DatetimeIndex([datetime(1975, 1, i, 0) for i in range(1, 6)], freq="D"),
    )
    x = series.resample("D").max().rolling(window=1).max()
    tm.assert_series_equal(expected, x)

    # Now specify median (10.0)
    expected = Series(
        [0.0, 1.0, 2.0, 3.0, 10.0],
        index=DatetimeIndex([datetime(1975, 1, i, 0) for i in range(1, 6)], freq="D"),
    )
    x = series.resample("D").median().rolling(window=1).max()
    tm.assert_series_equal(expected, x)

    # Now specify mean (4+10+20)/3
    v = (4.0 + 10.0 + 20.0) / 3.0
    expected = Series(
        [0.0, 1.0, 2.0, 3.0, v],
        index=DatetimeIndex([datetime(1975, 1, i, 0) for i in range(1, 6)], freq="D"),
    )
    x = series.resample("D").mean().rolling(window=1).max()
    tm.assert_series_equal(expected, x)
Example #53
0
    def test_resample_basic(self):
        rng = date_range('1/1/2000 00:00:00',
                         '1/1/2000 00:13:00',
                         freq='min',
                         name='index')
        s = Series(np.random.randn(14), index=rng)
        result = s.resample('5min', how='mean', closed='right', label='right')
        expected = Series([s[0], s[1:6].mean(), s[6:11].mean(), s[11:].mean()],
                          index=date_range('1/1/2000', periods=4, freq='5min'))
        assert_series_equal(result, expected)
        self.assertEqual(result.index.name, 'index')

        result = s.resample('5min', how='mean', closed='left', label='right')
        expected = Series([s[:5].mean(), s[5:10].mean(), s[10:].mean()],
                          index=date_range('1/1/2000 00:05',
                                           periods=3,
                                           freq='5min'))
        assert_series_equal(result, expected)

        s = self.series
        result = s.resample('5Min', how='last')
        grouper = TimeGrouper(Minute(5), closed='left', label='left')
        expect = s.groupby(grouper).agg(lambda x: x[-1])
        assert_series_equal(result, expect)
def test_resample_float_base():
    # GH25161
    dt = pd.to_datetime([
        "2018-11-26 16:17:43.51", "2018-11-26 16:17:44.51",
        "2018-11-26 16:17:45.51"
    ])
    s = Series(np.arange(3), index=dt)

    base = 17 + 43.51 / 60
    with tm.assert_produces_warning(FutureWarning):
        result = s.resample("3min", base=base).size()
    expected = Series(3,
                      index=pd.DatetimeIndex(["2018-11-26 16:17:43.51"],
                                             freq="3min"))
    tm.assert_series_equal(result, expected)
Example #55
0
def resample(series: pd.Series, freq: str, method: str = 'mean') -> pd.Series:
    """Resamples its input series using `freq` and the aggregation method
    `method` (as described in the pandas documentation).

    Example: `(resample (series "hourly") "D")`

    """

    resampled = series.resample(freq)

    # check method
    meth = getattr(resampled, method, None)
    if meth is None:
        raise ValueError(f'bad resampling method `{method}`')

    return resampled.apply(method)
Example #56
0
    def test_annual_upsample(self):
        ts = _simple_pts('1/1/1990', '12/31/1995', freq='A-DEC')
        df = DataFrame({'a': ts})
        rdf = df.resample('D', fill_method='ffill')
        exp = df['a'].resample('D', fill_method='ffill')
        assert_series_equal(rdf['a'], exp)

        rng = period_range('2000', '2003', freq='A-DEC')
        ts = Series([1, 2, 3, 4], index=rng)

        result = ts.resample('M', fill_method='ffill')
        ex_index = period_range('2000-01', '2003-12', freq='M')

        expected = ts.asfreq('M', how='start').reindex(ex_index,
                                                       method='ffill')
        assert_series_equal(result, expected)
Example #57
0
def reaggregate_ipws(ipws, fun=npsum, freq='H', rule='D'):
    """
    Resample IPWs using the function fun, but only sum is supported.
    `freq` corresponds to the actual frequency of the ipws; rule corresponds to
    one of the resampling 'rules' given here:
    http://pandas.pydata.org/pandas-docs/dev/timeseries.html#time-date-components
    """
    assert fun is npsum, "Cannot use " + fun.func_name + \
        ", only sum has been implemented"

    assert _is_consecutive(ipws)

    ipw0 = ipws[0]
    start_datetime = ipw0.start_datetime

    idx = date_range(start=start_datetime, periods=len(ipws), freq=freq)

    series = Series(map(lambda ipw: ipw.data_frame(), ipws), index=idx)

    resampled = series.resample(rule, how=npsum)
    resampled_idx = resampled.index

    resampled_dt = resampled_idx[1] - resampled_idx[0]

    resampled_ipws = [IPW() for el in resampled]

    header_dict = deepcopy(ipw0.header_dict)
    file_type = ipw0.file_type
    # bands = deepcopy(ipw0.bands)
    bands = ipw0.bands
    # nonglobal_bands = deepcopy(ipw0.nonglobal_bands)
    nonglobal_bands = ipw0.nonglobal_bands
    geotransform = ipw0.geotransform
    for ipw_idx, ipw in enumerate(resampled_ipws):

        ipw._data_frame = resampled[ipw_idx]
        ipw.start_datetime = resampled_idx[ipw_idx]
        ipw.end_datetime = resampled_idx[ipw_idx] + resampled_dt
        ipw.header_dict = deepcopy(header_dict)
        ipw.file_type = file_type
        ipw.bands = deepcopy(bands)
        ipw.nonglobal_bands = deepcopy(nonglobal_bands)
        ipw.geotransform = geotransform

        ipw.recalculate_header()

    return resampled_ipws
Example #58
0
 def test_resample_basic(self):
     # GH3609
     s = Series(
         range(100),
         index=date_range("20130101", freq="s", periods=100, name="idx"),
         dtype="float",
     )
     s[10:30] = np.nan
     index = PeriodIndex(
         [Period("2013-01-01 00:00", "T"), Period("2013-01-01 00:01", "T")],
         name="idx",
     )
     expected = Series([34.5, 79.5], index=index)
     result = s.to_period().resample("T", kind="period").mean()
     tm.assert_series_equal(result, expected)
     result2 = s.resample("T", kind="period").mean()
     tm.assert_series_equal(result2, expected)
def test_rolling_max_gh6297():
    """Replicate result expected in GH #6297"""
    indices = [datetime(1975, 1, i) for i in range(1, 6)]
    # So that we can have 2 datapoints on one of the days
    indices.append(datetime(1975, 1, 3, 6, 0))
    series = Series(range(1, 7), index=indices)
    # Use floats instead of ints as values
    series = series.map(lambda x: float(x))
    # Sort chronologically
    series = series.sort_index()

    expected = Series(
        [1.0, 2.0, 6.0, 4.0, 5.0],
        index=DatetimeIndex([datetime(1975, 1, i, 0) for i in range(1, 6)], freq="D"),
    )
    x = series.resample("D").max().rolling(window=1).max()
    tm.assert_series_equal(expected, x)
Example #60
0
def wind(wind: pd.Series, ts: float):
    """
    Wind is assumed constant throughout the day

    Parameters
    ----------
    wind:
        Daily timeseries of wind
    ts:
        Timestep to disaggregate down to

    Returns
    -------
    wind:
        A sub-daily timeseries of wind
    """
    return wind.resample('{:0.0f}T'.format(ts)).fillna(method='ffill')