Example #1
0
def test_raises_on_non_datetimelike_index():
    # this is a non datetimelike index
    xp = DataFrame()
    msg = ("Only valid with DatetimeIndex, TimedeltaIndex or PeriodIndex,"
           " but got an instance of 'Index'")
    with pytest.raises(TypeError, match=msg):
        xp.resample('A').mean()
Example #2
0
def test_resample_with_nat():
    # GH 13020
    index = DatetimeIndex([pd.NaT,
                           '1970-01-01 00:00:00',
                           pd.NaT,
                           '1970-01-01 00:00:01',
                           '1970-01-01 00:00:02'])
    frame = DataFrame([2, 3, 5, 7, 11], index=index)

    index_1s = DatetimeIndex(['1970-01-01 00:00:00',
                              '1970-01-01 00:00:01',
                              '1970-01-01 00:00:02'])
    frame_1s = DataFrame([3, 7, 11], index=index_1s)
    assert_frame_equal(frame.resample('1s').mean(), frame_1s)

    index_2s = DatetimeIndex(['1970-01-01 00:00:00',
                              '1970-01-01 00:00:02'])
    frame_2s = DataFrame([5, 11], index=index_2s)
    assert_frame_equal(frame.resample('2s').mean(), frame_2s)

    index_3s = DatetimeIndex(['1970-01-01 00:00:00'])
    frame_3s = DataFrame([7], index=index_3s)
    assert_frame_equal(frame.resample('3s').mean(), frame_3s)

    assert_frame_equal(frame.resample('60s').mean(), frame_3s)
    def test_evenly_divisible_with_no_extra_bins(self):
        # 4076
        # when the frequency is evenly divisible, sometimes extra bins

        df = DataFrame(np.random.randn(9, 3),
                       index=date_range('2000-1-1', periods=9))
        result = df.resample('5D').mean()
        expected = pd.concat(
            [df.iloc[0:5].mean(), df.iloc[5:].mean()], axis=1).T
        expected.index = [Timestamp('2000-1-1'), Timestamp('2000-1-6')]
        assert_frame_equal(result, expected)

        index = date_range(start='2001-5-4', periods=28)
        df = DataFrame(
            [{'REST_KEY': 1, 'DLY_TRN_QT': 80, 'DLY_SLS_AMT': 90,
              'COOP_DLY_TRN_QT': 30, 'COOP_DLY_SLS_AMT': 20}] * 28 +
            [{'REST_KEY': 2, 'DLY_TRN_QT': 70, 'DLY_SLS_AMT': 10,
              'COOP_DLY_TRN_QT': 50, 'COOP_DLY_SLS_AMT': 20}] * 28,
            index=index.append(index)).sort_index()

        index = date_range('2001-5-4', periods=4, freq='7D')
        expected = DataFrame(
            [{'REST_KEY': 14, 'DLY_TRN_QT': 14, 'DLY_SLS_AMT': 14,
              'COOP_DLY_TRN_QT': 14, 'COOP_DLY_SLS_AMT': 14}] * 4,
            index=index)
        result = df.resample('7D').count()
        assert_frame_equal(result, expected)

        expected = DataFrame(
            [{'REST_KEY': 21, 'DLY_TRN_QT': 1050, 'DLY_SLS_AMT': 700,
              'COOP_DLY_TRN_QT': 560, 'COOP_DLY_SLS_AMT': 280}] * 4,
            index=index)
        result = df.resample('7D').sum()
        assert_frame_equal(result, expected)
Example #4
0
    def test_resample_weekly_bug_1726(self):
        # 8/6/12 is a Monday
        ind = DatetimeIndex(start="8/6/2012", end="8/26/2012", freq="D")
        n = len(ind)
        data = [[x] * 5 for x in range(n)]
        df = DataFrame(data, columns=["open", "high", "low", "close", "vol"], index=ind)

        # it works!
        df.resample("W-MON", how="first", closed="left", label="left")
Example #5
0
    def test_resample_median_bug_1688(self):
        df = DataFrame([1, 2], index=[datetime(2012, 1, 1, 0, 0, 0), datetime(2012, 1, 1, 0, 5, 0)])

        result = df.resample("T", how=lambda x: x.mean())
        exp = df.asfreq("T")
        tm.assert_frame_equal(result, exp)

        result = df.resample("T", how="median")
        exp = df.asfreq("T")
        tm.assert_frame_equal(result, exp)
Example #6
0
    def test_default_left_closed_label(self):
        others = ["MS", "AS", "QS", "D", "H"]
        others_freq = ["D", "Q", "M", "H", "T"]

        for from_freq, to_freq in zip(others_freq, others):
            idx = DatetimeIndex(start="8/15/2012", periods=100, freq=from_freq)
            df = DataFrame(np.random.randn(len(idx), 2), idx)

            resampled = df.resample(to_freq)
            assert_frame_equal(resampled, df.resample(to_freq, closed="left", label="left"))
    def test_resample_weekly_bug_1726(self):
        # 8/6/12 is a Monday
        ind = DatetimeIndex(start="8/6/2012", end="8/26/2012", freq="D")
        n = len(ind)
        data = [[x] * 5 for x in range(n)]
        df = DataFrame(data, columns=['open', 'high', 'low', 'close', 'vol'],
                       index=ind)

        # it works!
        df.resample('W-MON', how='first', closed='left', label='left')
    def test_resample_unequal_times(self):
        # #1772
        start = datetime(1999, 3, 1, 5)
        # end hour is less than start
        end = datetime(2012, 7, 31, 4)
        bad_ind = date_range(start, end, freq="30min")
        df = DataFrame({'close': 1}, index=bad_ind)

        # it works!
        df.resample('AS', 'sum')
Example #9
0
    def test_default_right_closed_label(self):
        end_freq = ["D", "Q", "M", "D"]
        end_types = ["M", "A", "Q", "W"]

        for from_freq, to_freq in zip(end_freq, end_types):
            idx = DatetimeIndex(start="8/15/2012", periods=100, freq=from_freq)
            df = DataFrame(np.random.randn(len(idx), 2), idx)

            resampled = df.resample(to_freq)
            assert_frame_equal(resampled, df.resample(to_freq, closed="right", label="right"))
Example #10
0
    def test_default_left_closed_label(self):
        others = ['MS', 'AS', 'QS', 'D', 'H']
        others_freq = ['D', 'Q', 'M', 'H', 'T']

        for from_freq, to_freq in zip(others_freq, others):
            idx = date_range(start='8/15/2012', periods=100, freq=from_freq)
            df = DataFrame(np.random.randn(len(idx), 2), idx)

            resampled = df.resample(to_freq).mean()
            assert_frame_equal(resampled, df.resample(to_freq, closed='left',
                                                      label='left').mean())
Example #11
0
    def test_default_right_closed_label(self):
        end_freq = ['D', 'Q', 'M', 'D']
        end_types = ['M', 'A', 'Q', 'W']

        for from_freq, to_freq in zip(end_freq, end_types):
            idx = date_range(start='8/15/2012', periods=100, freq=from_freq)
            df = DataFrame(np.random.randn(len(idx), 2), idx)

            resampled = df.resample(to_freq).mean()
            assert_frame_equal(resampled, df.resample(to_freq, closed='right',
                                                      label='right').mean())
Example #12
0
 def test_selection(self, index, freq, kind):
     # This is a bug, these should be implemented
     # GH 14008
     rng = np.arange(len(index), dtype=np.int64)
     df = DataFrame({'date': index, 'a': rng},
                    index=pd.MultiIndex.from_arrays([rng, index],
                                                    names=['v', 'd']))
     with pytest.raises(NotImplementedError):
         df.resample(freq, on='date', kind=kind)
     with pytest.raises(NotImplementedError):
         df.resample(freq, level='d', kind=kind)
Example #13
0
    def test_resample_median_bug_1688(self):

        for dtype in ["int64", "int32", "float64", "float32"]:
            df = DataFrame([1, 2], index=[datetime(2012, 1, 1, 0, 0, 0), datetime(2012, 1, 1, 0, 5, 0)], dtype=dtype)

            result = df.resample("T", how=lambda x: x.mean())
            exp = df.asfreq("T")
            tm.assert_frame_equal(result, exp)

            result = df.resample("T", how="median")
            exp = df.asfreq("T")
            tm.assert_frame_equal(result, exp)
Example #14
0
 def test_selection(self, index, freq, kind, kwargs):
     # This is a bug, these should be implemented
     # GH 14008
     rng = np.arange(len(index), dtype=np.int64)
     df = DataFrame({'date': index, 'a': rng},
                    index=pd.MultiIndex.from_arrays([rng, index],
                                                    names=['v', 'd']))
     msg = ("Resampling from level= or on= selection with a PeriodIndex is"
            r" not currently supported, use \.set_index\(\.\.\.\) to"
            " explicitly set index")
     with pytest.raises(NotImplementedError, match=msg):
         df.resample(freq, kind=kind, **kwargs)
    def test_resample_anchored_intraday(self):
        # #1471, #1458

        rng = date_range('1/1/2012', '4/1/2012', freq='10min')
        df = DataFrame(rng.month, index=rng)

        result = df.resample('M')
        expected = df.resample('M', kind='period').to_timestamp()
        tm.assert_frame_equal(result, expected)

        result = df.resample('M', closed='left')
        expected = df.resample('M', kind='period', closed='left').to_timestamp()
        tm.assert_frame_equal(result, expected)

        rng = date_range('1/1/2012', '4/1/2013', freq='10min')
        df = DataFrame(rng.month, index=rng)

        result = df.resample('Q')
        expected = df.resample('Q', kind='period').to_timestamp()
        tm.assert_frame_equal(result, expected)

        result = df.resample('Q', closed='left')
        expected = df.resample('Q', kind='period', closed='left').to_timestamp()
        tm.assert_frame_equal(result, expected)

        ts = _simple_ts('2012-04-29 23:00', '2012-04-30 5:00', freq='h')
        resampled = ts.resample('M')
        self.assert_(len(resampled) == 1)
Example #16
0
def test_try_aggregate_non_existing_column():
    # GH 16766
    data = [
        {'dt': datetime(2017, 6, 1, 0), 'x': 1.0, 'y': 2.0},
        {'dt': datetime(2017, 6, 1, 1), 'x': 2.0, 'y': 2.0},
        {'dt': datetime(2017, 6, 1, 2), 'x': 3.0, 'y': 1.5}
    ]
    df = DataFrame(data).set_index('dt')

    # Error as we don't have 'z' column
    with pytest.raises(KeyError):
        df.resample('30T').agg({'x': ['mean'],
                                'y': ['median'],
                                'z': ['sum']})
Example #17
0
def test_resample_median_bug_1688():

    for dtype in ['int64', 'int32', 'float64', 'float32']:
        df = DataFrame([1, 2], index=[datetime(2012, 1, 1, 0, 0, 0),
                                      datetime(2012, 1, 1, 0, 5, 0)],
                       dtype=dtype)

        result = df.resample("T").apply(lambda x: x.mean())
        exp = df.asfreq('T')
        tm.assert_frame_equal(result, exp)

        result = df.resample("T").median()
        exp = df.asfreq('T')
        tm.assert_frame_equal(result, exp)
def slide14():
    frame = DataFrame(np.random.randn(2, 4),
                      index=pd.date_range('1/1/2000', periods=2, freq='W-WED'),
                      columns=['Colorado', 'Texas', 'New York', 'Ohio'])
    print frame[:5]

    df_daily = frame.resample('D')
    print 'daily fill_method=none'
    print df_daily
    print 'daily fill_method=ffill'
    print frame.resample('D', fill_method='ffill')
    print 'daily fill_method=ffill limit=2'
    print frame.resample('D', fill_method='ffill', limit=2)

    print frame.resample('W-THU', fill_method='ffill')

    print 'resampling with periods'
    frame = DataFrame(np.random.randn(24, 4),
                      index=pd.period_range('1-2000', '12-2001', freq='M'),
                      columns=['Colorado', 'Texas', 'New York', 'Ohio'])
    print frame[:5]

    annual_frame = frame.resample('A-DEC', how='mean')
    print annual_frame
    print 'resample Quarterly'
    print annual_frame.resample('Q-DEC', fill_method='ffill')
    print annual_frame.resample('Q-DEC',
                                fill_method='ffill',
                                convention='start')
Example #19
0
    def test_annual_upsample(self):
        targets = ['D', 'B', 'M']

        for month in MONTHS:
            ts = _simple_pts('1/1/1990', '12/31/1995', freq='A-%s' % month)

            for targ, conv, meth in product(targets, ['start', 'end'],
                                            ['ffill', 'bfill']):
                result = ts.resample(targ, fill_method=meth,
                                     convention=conv)
                expected = result.to_timestamp(targ, how=conv)
                expected = expected.asfreq(targ, meth).to_period()
                assert_series_equal(result, expected)

        df = DataFrame({'a' : ts})
        rdf = df.resample('D', fill_method='ffill')
        exp = df['a'].resample('D', fill_method='ffill')
        assert_series_equal(rdf['a'], exp)


        rng = period_range('2000', '2003', freq='A-DEC')
        ts = Series([1, 2, 3, 4], index=rng)

        result = ts.resample('M', fill_method='ffill')
        ex_index = period_range('2000-01', '2003-12', freq='M')

        expected = ts.asfreq('M', how='start').reindex(ex_index,
                                                       method='ffill')
        assert_series_equal(result, expected)
Example #20
0
    def get_date_trend(self, mode_date):
        """
        :param mode_date: 日期模式,合并到最短时间单位. 0-day, 1-week, 2-month, 3-Quarter. (default 2)
        """
        axisLabels = self.oriDate[:]
        pointVals = [{copy.deepcopy(oriValue): 1} for oriValue in self.oriValues]

        rule_mode = {'0': 'D', '1': 'W', '2': 'M', '3': 'Q'}

        df = DataFrame(pointVals, index=axisLabels)
        df = df.resample(rule_mode[str(mode_date)], how='sum')
        df = df.fillna(0)

        """各项总和"""
        # cols_name = []
        # for name, col in df.iteritems():
        #     cols_name.append(name)
        # df['SUM'] = 0
        # for i in xrange(len(cols_name)):
        #     df['SUM'] += df[cols_name[i]]

        """宿舍比重"""
        # df['PER_DORM'] = df['dorm']/df['SUM'] if 'dorm' in df else 0  # 仅当存在宿舍值时才计算宿舍比重,否则设为0

        axisLabels = map(lambda x: x.strftime('%Y-%m-%d'), df.index.tolist())  # 从dataframe 中取出作为索引的日期标签成为队列
        seriesData = []
        legendLabels = []
        for colName, col in df.iteritems():
            legendLabels.append(colName)
            data = map(lambda x: 0.0 if isnan(x) else float(x), col.tolist())
            seriesData.append({'name': colName, 'data': data})

        json_dateTrend = {'axisLabels': axisLabels, 'legendLabels': legendLabels, 'seriesData': seriesData}
        return json_dateTrend
Example #21
0
def test_resample_across_dst():
    # The test resamples a DatetimeIndex with values before and after a
    # DST change
    # Issue: 14682

    # The DatetimeIndex we will start with
    # (note that DST happens at 03:00+02:00 -> 02:00+01:00)
    # 2016-10-30 02:23:00+02:00, 2016-10-30 02:23:00+01:00
    df1 = DataFrame([1477786980, 1477790580], columns=['ts'])
    dti1 = DatetimeIndex(pd.to_datetime(df1.ts, unit='s')
                         .dt.tz_localize('UTC')
                            .dt.tz_convert('Europe/Madrid'))

    # The expected DatetimeIndex after resampling.
    # 2016-10-30 02:00:00+02:00, 2016-10-30 02:00:00+01:00
    df2 = DataFrame([1477785600, 1477789200], columns=['ts'])
    dti2 = DatetimeIndex(pd.to_datetime(df2.ts, unit='s')
                         .dt.tz_localize('UTC')
                            .dt.tz_convert('Europe/Madrid'))
    df = DataFrame([5, 5], index=dti1)

    result = df.resample(rule='H').sum()
    expected = DataFrame([5, 5], index=dti2)

    assert_frame_equal(result, expected)
Example #22
0
    def test_subset(self):
        N = 10
        rng = date_range('1/1/1990', periods=N, freq='53s')
        df = DataFrame({'A': np.arange(N), 'B': np.arange(N)},
                       index=rng)
        df.loc[4:8, 'A'] = np.nan
        dates = date_range('1/1/1990', periods=N * 3,
                           freq='25s')

        # with a subset of A should be the same
        result = df.asof(dates, subset='A')
        expected = df.asof(dates)
        tm.assert_frame_equal(result, expected)

        # same with A/B
        result = df.asof(dates, subset=['A', 'B'])
        expected = df.asof(dates)
        tm.assert_frame_equal(result, expected)

        # B gives self.df.asof
        result = df.asof(dates, subset='B')
        expected = df.resample('25s', closed='right').ffill().reindex(dates)
        expected.iloc[20:] = 9

        tm.assert_frame_equal(result, expected)
Example #23
0
    def test_resample_axis1(self):
        rng = date_range("1/1/2000", "2/29/2000")
        df = DataFrame(np.random.randn(3, len(rng)), columns=rng, index=["a", "b", "c"])

        result = df.resample("M", axis=1)
        expected = df.T.resample("M").T
        tm.assert_frame_equal(result, expected)
Example #24
0
    def test_annual_upsample(self):
        targets = ["D", "B", "M"]

        for month in MONTHS:
            ts = _simple_pts("1/1/1990", "12/31/1995", freq="A-%s" % month)

            for targ, conv, meth in product(targets, ["start", "end"], ["ffill", "bfill"]):
                result = ts.resample(targ, fill_method=meth, convention=conv)
                expected = result.to_timestamp(targ, how=conv)
                expected = expected.asfreq(targ, meth).to_period()
                assert_series_equal(result, expected)

        df = DataFrame({"a": ts})
        rdf = df.resample("D", fill_method="ffill")
        exp = df["a"].resample("D", fill_method="ffill")
        assert_series_equal(rdf["a"], exp)

        rng = period_range("2000", "2003", freq="A-DEC")
        ts = Series([1, 2, 3, 4], index=rng)

        result = ts.resample("M", fill_method="ffill")
        ex_index = period_range("2000-01", "2003-12", freq="M")

        expected = ts.asfreq("M", how="start").reindex(ex_index, method="ffill")
        assert_series_equal(result, expected)
Example #25
0
 def test_resample_with_only_nat(self):
     # GH 13224
     pi = PeriodIndex([pd.NaT] * 3, freq='S')
     frame = DataFrame([2, 3, 5], index=pi)
     expected_index = PeriodIndex(data=[], freq=pi.freq)
     expected = DataFrame([], index=expected_index)
     result = frame.resample('1s').mean()
     assert_frame_equal(result, expected)
    def test_resample_axis1(self):
        rng = date_range('1/1/2000', '2/29/2000')
        df = DataFrame(np.random.randn(3, len(rng)), columns=rng,
                       index=['a', 'b', 'c'])

        result = df.resample('M', axis=1)
        expected = df.T.resample('M').T
        tm.assert_frame_equal(result, expected)
Example #27
0
def test_asfreq_bug():
    df = DataFrame(data=[1, 3],
                   index=[timedelta(), timedelta(minutes=3)])
    result = df.resample('1T').asfreq()
    expected = DataFrame(data=[1, np.nan, np.nan, 3],
                         index=timedelta_range('0 day',
                                               periods=4,
                                               freq='1T'))
    assert_frame_equal(result, expected)
Example #28
0
 def save_to_file(self, fn):
     gg = DataFrame(self.power_series_apps_table)
     try:
         del gg['diff1']
         del gg['diff2']
     except Exception:
         print('')
         
     gg['Loc Events'] = self.loc.events_apps_1min['Apps']
     apps = self.loc.metadata.get_channels()
     sd = {}
     #Initialize series with 0s
     for app in apps:
         sd[app] = Series(0, index=gg.index)
         
     #Count location events for each appliance
     for index, row in gg.iterrows():
         try:
             if len(row['Loc Events']) > 0:
                 for app in apps:
                     n = row['Loc Events'].count(app)
                     sd[app][index] = n
         except Exception:
             continue
     
     if self.loc.name == 'REDD':
         sd[(3,4)] = sd[3]
         sd[(10,20)] = sd[10]
         del sd[3]
         del sd[4]
         del sd[10]
         del sd[20]
       
     #Change column names and append them to gral table
     locevents = DataFrame(sd)
     locevents.columns = [(str(col) + ' locEv') for col in locevents]        
     for locEv in locevents:
         gg[locEv] = locevents[locEv]
         
     
     #Get power values of each appliance and resample for 1min
     act = DataFrame(self.loc.appliances_consuming_times)
     act = act.resample('1Min')
            
     if self.loc.name == 'REDD':
         del act[3]
         del act[10]
         act.columns = [(3,4), 5,6,7,8,9,11,12,13,14,15,16,17,18,19,(10,20)]
     act.columns = [(str(col) + ' conEv') for col in act]
     
     for app in act:
         gg[app] = act[app]        
     gg.columns = [str(col) for col in gg]
     gg = gg[sorted(gg.columns)]
     gg.to_csv(fn)   
     return
Example #29
0
def test_resample_extra_index_point():
    # GH#9756
    index = date_range(start='20150101', end='20150331', freq='BM')
    expected = DataFrame({'A': Series([21, 41, 63], index=index)})

    index = date_range(start='20150101', end='20150331', freq='B')
    df = DataFrame(
        {'A': Series(range(len(index)), index=index)}, dtype='int64')
    result = df.resample('BM').last()
    assert_frame_equal(result, expected)
Example #30
0
    def test_resample_with_nat(self, periods, values, freq, expected_values):
        # GH 13224
        index = PeriodIndex(periods, freq='S')
        frame = DataFrame(values, index=index)

        expected_index = period_range('1970-01-01 00:00:00',
                                      periods=len(expected_values), freq=freq)
        expected = DataFrame(expected_values, index=expected_index)
        result = frame.resample(freq).mean()
        assert_frame_equal(result, expected)