Beispiel #1
1
def test_resample_with_nat():
    # GH 13020
    index = DatetimeIndex([pd.NaT,
                           '1970-01-01 00:00:00',
                           pd.NaT,
                           '1970-01-01 00:00:01',
                           '1970-01-01 00:00:02'])
    frame = DataFrame([2, 3, 5, 7, 11], index=index)

    index_1s = DatetimeIndex(['1970-01-01 00:00:00',
                              '1970-01-01 00:00:01',
                              '1970-01-01 00:00:02'])
    frame_1s = DataFrame([3, 7, 11], index=index_1s)
    assert_frame_equal(frame.resample('1s').mean(), frame_1s)

    index_2s = DatetimeIndex(['1970-01-01 00:00:00',
                              '1970-01-01 00:00:02'])
    frame_2s = DataFrame([5, 11], index=index_2s)
    assert_frame_equal(frame.resample('2s').mean(), frame_2s)

    index_3s = DatetimeIndex(['1970-01-01 00:00:00'])
    frame_3s = DataFrame([7], index=index_3s)
    assert_frame_equal(frame.resample('3s').mean(), frame_3s)

    assert_frame_equal(frame.resample('60s').mean(), frame_3s)
Beispiel #2
1
    def test_resample_median_bug_1688(self):
        df = DataFrame([1, 2], index=[datetime(2012, 1, 1, 0, 0, 0), datetime(2012, 1, 1, 0, 5, 0)])

        result = df.resample("T", how=lambda x: x.mean())
        exp = df.asfreq("T")
        tm.assert_frame_equal(result, exp)

        result = df.resample("T", how="median")
        exp = df.asfreq("T")
        tm.assert_frame_equal(result, exp)
Beispiel #3
0
def test_raises_on_non_datetimelike_index():
    # this is a non datetimelike index
    xp = DataFrame()
    msg = ("Only valid with DatetimeIndex, TimedeltaIndex or PeriodIndex,"
           " but got an instance of 'Index'")
    with pytest.raises(TypeError, match=msg):
        xp.resample('A').mean()
    def test_evenly_divisible_with_no_extra_bins(self):
        # 4076
        # when the frequency is evenly divisible, sometimes extra bins

        df = DataFrame(np.random.randn(9, 3),
                       index=date_range('2000-1-1', periods=9))
        result = df.resample('5D').mean()
        expected = pd.concat(
            [df.iloc[0:5].mean(), df.iloc[5:].mean()], axis=1).T
        expected.index = [Timestamp('2000-1-1'), Timestamp('2000-1-6')]
        assert_frame_equal(result, expected)

        index = date_range(start='2001-5-4', periods=28)
        df = DataFrame(
            [{'REST_KEY': 1, 'DLY_TRN_QT': 80, 'DLY_SLS_AMT': 90,
              'COOP_DLY_TRN_QT': 30, 'COOP_DLY_SLS_AMT': 20}] * 28 +
            [{'REST_KEY': 2, 'DLY_TRN_QT': 70, 'DLY_SLS_AMT': 10,
              'COOP_DLY_TRN_QT': 50, 'COOP_DLY_SLS_AMT': 20}] * 28,
            index=index.append(index)).sort_index()

        index = date_range('2001-5-4', periods=4, freq='7D')
        expected = DataFrame(
            [{'REST_KEY': 14, 'DLY_TRN_QT': 14, 'DLY_SLS_AMT': 14,
              'COOP_DLY_TRN_QT': 14, 'COOP_DLY_SLS_AMT': 14}] * 4,
            index=index)
        result = df.resample('7D').count()
        assert_frame_equal(result, expected)

        expected = DataFrame(
            [{'REST_KEY': 21, 'DLY_TRN_QT': 1050, 'DLY_SLS_AMT': 700,
              'COOP_DLY_TRN_QT': 560, 'COOP_DLY_SLS_AMT': 280}] * 4,
            index=index)
        result = df.resample('7D').sum()
        assert_frame_equal(result, expected)
Beispiel #5
0
    def test_resample_weekly_bug_1726(self):
        # 8/6/12 is a Monday
        ind = DatetimeIndex(start="8/6/2012", end="8/26/2012", freq="D")
        n = len(ind)
        data = [[x] * 5 for x in range(n)]
        df = DataFrame(data, columns=["open", "high", "low", "close", "vol"], index=ind)

        # it works!
        df.resample("W-MON", how="first", closed="left", label="left")
Beispiel #6
0
    def test_default_left_closed_label(self):
        others = ["MS", "AS", "QS", "D", "H"]
        others_freq = ["D", "Q", "M", "H", "T"]

        for from_freq, to_freq in zip(others_freq, others):
            idx = DatetimeIndex(start="8/15/2012", periods=100, freq=from_freq)
            df = DataFrame(np.random.randn(len(idx), 2), idx)

            resampled = df.resample(to_freq)
            assert_frame_equal(resampled, df.resample(to_freq, closed="left", label="left"))
    def test_resample_unequal_times(self):
        # #1772
        start = datetime(1999, 3, 1, 5)
        # end hour is less than start
        end = datetime(2012, 7, 31, 4)
        bad_ind = date_range(start, end, freq="30min")
        df = DataFrame({'close': 1}, index=bad_ind)

        # it works!
        df.resample('AS', 'sum')
    def test_resample_weekly_bug_1726(self):
        # 8/6/12 is a Monday
        ind = DatetimeIndex(start="8/6/2012", end="8/26/2012", freq="D")
        n = len(ind)
        data = [[x] * 5 for x in range(n)]
        df = DataFrame(data, columns=['open', 'high', 'low', 'close', 'vol'],
                       index=ind)

        # it works!
        df.resample('W-MON', how='first', closed='left', label='left')
Beispiel #9
0
    def test_default_right_closed_label(self):
        end_freq = ["D", "Q", "M", "D"]
        end_types = ["M", "A", "Q", "W"]

        for from_freq, to_freq in zip(end_freq, end_types):
            idx = DatetimeIndex(start="8/15/2012", periods=100, freq=from_freq)
            df = DataFrame(np.random.randn(len(idx), 2), idx)

            resampled = df.resample(to_freq)
            assert_frame_equal(resampled, df.resample(to_freq, closed="right", label="right"))
    def test_default_left_closed_label(self):
        others = ['MS', 'AS', 'QS', 'D', 'H']
        others_freq = ['D', 'Q', 'M', 'H', 'T']

        for from_freq, to_freq in zip(others_freq, others):
            idx = date_range(start='8/15/2012', periods=100, freq=from_freq)
            df = DataFrame(np.random.randn(len(idx), 2), idx)

            resampled = df.resample(to_freq).mean()
            assert_frame_equal(resampled, df.resample(to_freq, closed='left',
                                                      label='left').mean())
 def test_selection(self, index, freq, kind):
     # This is a bug, these should be implemented
     # GH 14008
     rng = np.arange(len(index), dtype=np.int64)
     df = DataFrame({'date': index, 'a': rng},
                    index=pd.MultiIndex.from_arrays([rng, index],
                                                    names=['v', 'd']))
     with pytest.raises(NotImplementedError):
         df.resample(freq, on='date', kind=kind)
     with pytest.raises(NotImplementedError):
         df.resample(freq, level='d', kind=kind)
    def test_default_right_closed_label(self):
        end_freq = ['D', 'Q', 'M', 'D']
        end_types = ['M', 'A', 'Q', 'W']

        for from_freq, to_freq in zip(end_freq, end_types):
            idx = date_range(start='8/15/2012', periods=100, freq=from_freq)
            df = DataFrame(np.random.randn(len(idx), 2), idx)

            resampled = df.resample(to_freq).mean()
            assert_frame_equal(resampled, df.resample(to_freq, closed='right',
                                                      label='right').mean())
Beispiel #13
0
 def test_selection(self, index, freq, kind, kwargs):
     # This is a bug, these should be implemented
     # GH 14008
     rng = np.arange(len(index), dtype=np.int64)
     df = DataFrame({'date': index, 'a': rng},
                    index=pd.MultiIndex.from_arrays([rng, index],
                                                    names=['v', 'd']))
     msg = ("Resampling from level= or on= selection with a PeriodIndex is"
            r" not currently supported, use \.set_index\(\.\.\.\) to"
            " explicitly set index")
     with pytest.raises(NotImplementedError, match=msg):
         df.resample(freq, kind=kind, **kwargs)
Beispiel #14
0
    def test_resample_median_bug_1688(self):

        for dtype in ["int64", "int32", "float64", "float32"]:
            df = DataFrame([1, 2], index=[datetime(2012, 1, 1, 0, 0, 0), datetime(2012, 1, 1, 0, 5, 0)], dtype=dtype)

            result = df.resample("T", how=lambda x: x.mean())
            exp = df.asfreq("T")
            tm.assert_frame_equal(result, exp)

            result = df.resample("T", how="median")
            exp = df.asfreq("T")
            tm.assert_frame_equal(result, exp)
Beispiel #15
0
def test_resample_median_bug_1688():

    for dtype in ['int64', 'int32', 'float64', 'float32']:
        df = DataFrame([1, 2], index=[datetime(2012, 1, 1, 0, 0, 0),
                                      datetime(2012, 1, 1, 0, 5, 0)],
                       dtype=dtype)

        result = df.resample("T").apply(lambda x: x.mean())
        exp = df.asfreq('T')
        tm.assert_frame_equal(result, exp)

        result = df.resample("T").median()
        exp = df.asfreq('T')
        tm.assert_frame_equal(result, exp)
    def test_resample_anchored_intraday(self):
        # #1471, #1458

        rng = date_range('1/1/2012', '4/1/2012', freq='10min')
        df = DataFrame(rng.month, index=rng)

        result = df.resample('M')
        expected = df.resample('M', kind='period').to_timestamp()
        tm.assert_frame_equal(result, expected)

        result = df.resample('M', closed='left')
        expected = df.resample('M', kind='period', closed='left').to_timestamp()
        tm.assert_frame_equal(result, expected)

        rng = date_range('1/1/2012', '4/1/2013', freq='10min')
        df = DataFrame(rng.month, index=rng)

        result = df.resample('Q')
        expected = df.resample('Q', kind='period').to_timestamp()
        tm.assert_frame_equal(result, expected)

        result = df.resample('Q', closed='left')
        expected = df.resample('Q', kind='period', closed='left').to_timestamp()
        tm.assert_frame_equal(result, expected)

        ts = _simple_ts('2012-04-29 23:00', '2012-04-30 5:00', freq='h')
        resampled = ts.resample('M')
        self.assert_(len(resampled) == 1)
Beispiel #17
0
def test_try_aggregate_non_existing_column():
    # GH 16766
    data = [
        {'dt': datetime(2017, 6, 1, 0), 'x': 1.0, 'y': 2.0},
        {'dt': datetime(2017, 6, 1, 1), 'x': 2.0, 'y': 2.0},
        {'dt': datetime(2017, 6, 1, 2), 'x': 3.0, 'y': 1.5}
    ]
    df = DataFrame(data).set_index('dt')

    # Error as we don't have 'z' column
    with pytest.raises(KeyError):
        df.resample('30T').agg({'x': ['mean'],
                                'y': ['median'],
                                'z': ['sum']})
def slide14():
    frame = DataFrame(np.random.randn(2, 4),
                      index=pd.date_range('1/1/2000', periods=2, freq='W-WED'),
                      columns=['Colorado', 'Texas', 'New York', 'Ohio'])
    print frame[:5]

    df_daily = frame.resample('D')
    print 'daily fill_method=none'
    print df_daily
    print 'daily fill_method=ffill'
    print frame.resample('D', fill_method='ffill')
    print 'daily fill_method=ffill limit=2'
    print frame.resample('D', fill_method='ffill', limit=2)

    print frame.resample('W-THU', fill_method='ffill')

    print 'resampling with periods'
    frame = DataFrame(np.random.randn(24, 4),
                      index=pd.period_range('1-2000', '12-2001', freq='M'),
                      columns=['Colorado', 'Texas', 'New York', 'Ohio'])
    print frame[:5]

    annual_frame = frame.resample('A-DEC', how='mean')
    print annual_frame
    print 'resample Quarterly'
    print annual_frame.resample('Q-DEC', fill_method='ffill')
    print annual_frame.resample('Q-DEC',
                                fill_method='ffill',
                                convention='start')
Beispiel #19
0
    def test_resample_axis1(self):
        rng = date_range("1/1/2000", "2/29/2000")
        df = DataFrame(np.random.randn(3, len(rng)), columns=rng, index=["a", "b", "c"])

        result = df.resample("M", axis=1)
        expected = df.T.resample("M").T
        tm.assert_frame_equal(result, expected)
Beispiel #20
0
def test_resample_across_dst():
    # The test resamples a DatetimeIndex with values before and after a
    # DST change
    # Issue: 14682

    # The DatetimeIndex we will start with
    # (note that DST happens at 03:00+02:00 -> 02:00+01:00)
    # 2016-10-30 02:23:00+02:00, 2016-10-30 02:23:00+01:00
    df1 = DataFrame([1477786980, 1477790580], columns=['ts'])
    dti1 = DatetimeIndex(pd.to_datetime(df1.ts, unit='s')
                         .dt.tz_localize('UTC')
                            .dt.tz_convert('Europe/Madrid'))

    # The expected DatetimeIndex after resampling.
    # 2016-10-30 02:00:00+02:00, 2016-10-30 02:00:00+01:00
    df2 = DataFrame([1477785600, 1477789200], columns=['ts'])
    dti2 = DatetimeIndex(pd.to_datetime(df2.ts, unit='s')
                         .dt.tz_localize('UTC')
                            .dt.tz_convert('Europe/Madrid'))
    df = DataFrame([5, 5], index=dti1)

    result = df.resample(rule='H').sum()
    expected = DataFrame([5, 5], index=dti2)

    assert_frame_equal(result, expected)
Beispiel #21
0
    def get_date_trend(self, mode_date):
        """
        :param mode_date: 日期模式,合并到最短时间单位. 0-day, 1-week, 2-month, 3-Quarter. (default 2)
        """
        axisLabels = self.oriDate[:]
        pointVals = [{copy.deepcopy(oriValue): 1} for oriValue in self.oriValues]

        rule_mode = {'0': 'D', '1': 'W', '2': 'M', '3': 'Q'}

        df = DataFrame(pointVals, index=axisLabels)
        df = df.resample(rule_mode[str(mode_date)], how='sum')
        df = df.fillna(0)

        """各项总和"""
        # cols_name = []
        # for name, col in df.iteritems():
        #     cols_name.append(name)
        # df['SUM'] = 0
        # for i in xrange(len(cols_name)):
        #     df['SUM'] += df[cols_name[i]]

        """宿舍比重"""
        # df['PER_DORM'] = df['dorm']/df['SUM'] if 'dorm' in df else 0  # 仅当存在宿舍值时才计算宿舍比重,否则设为0

        axisLabels = map(lambda x: x.strftime('%Y-%m-%d'), df.index.tolist())  # 从dataframe 中取出作为索引的日期标签成为队列
        seriesData = []
        legendLabels = []
        for colName, col in df.iteritems():
            legendLabels.append(colName)
            data = map(lambda x: 0.0 if isnan(x) else float(x), col.tolist())
            seriesData.append({'name': colName, 'data': data})

        json_dateTrend = {'axisLabels': axisLabels, 'legendLabels': legendLabels, 'seriesData': seriesData}
        return json_dateTrend
Beispiel #22
0
    def test_annual_upsample(self):
        targets = ["D", "B", "M"]

        for month in MONTHS:
            ts = _simple_pts("1/1/1990", "12/31/1995", freq="A-%s" % month)

            for targ, conv, meth in product(targets, ["start", "end"], ["ffill", "bfill"]):
                result = ts.resample(targ, fill_method=meth, convention=conv)
                expected = result.to_timestamp(targ, how=conv)
                expected = expected.asfreq(targ, meth).to_period()
                assert_series_equal(result, expected)

        df = DataFrame({"a": ts})
        rdf = df.resample("D", fill_method="ffill")
        exp = df["a"].resample("D", fill_method="ffill")
        assert_series_equal(rdf["a"], exp)

        rng = period_range("2000", "2003", freq="A-DEC")
        ts = Series([1, 2, 3, 4], index=rng)

        result = ts.resample("M", fill_method="ffill")
        ex_index = period_range("2000-01", "2003-12", freq="M")

        expected = ts.asfreq("M", how="start").reindex(ex_index, method="ffill")
        assert_series_equal(result, expected)
Beispiel #23
0
    def test_subset(self):
        N = 10
        rng = date_range('1/1/1990', periods=N, freq='53s')
        df = DataFrame({'A': np.arange(N), 'B': np.arange(N)},
                       index=rng)
        df.loc[4:8, 'A'] = np.nan
        dates = date_range('1/1/1990', periods=N * 3,
                           freq='25s')

        # with a subset of A should be the same
        result = df.asof(dates, subset='A')
        expected = df.asof(dates)
        tm.assert_frame_equal(result, expected)

        # same with A/B
        result = df.asof(dates, subset=['A', 'B'])
        expected = df.asof(dates)
        tm.assert_frame_equal(result, expected)

        # B gives self.df.asof
        result = df.asof(dates, subset='B')
        expected = df.resample('25s', closed='right').ffill().reindex(dates)
        expected.iloc[20:] = 9

        tm.assert_frame_equal(result, expected)
Beispiel #24
0
    def test_annual_upsample(self):
        targets = ['D', 'B', 'M']

        for month in MONTHS:
            ts = _simple_pts('1/1/1990', '12/31/1995', freq='A-%s' % month)

            for targ, conv, meth in product(targets, ['start', 'end'],
                                            ['ffill', 'bfill']):
                result = ts.resample(targ, fill_method=meth,
                                     convention=conv)
                expected = result.to_timestamp(targ, how=conv)
                expected = expected.asfreq(targ, meth).to_period()
                assert_series_equal(result, expected)

        df = DataFrame({'a' : ts})
        rdf = df.resample('D', fill_method='ffill')
        exp = df['a'].resample('D', fill_method='ffill')
        assert_series_equal(rdf['a'], exp)


        rng = period_range('2000', '2003', freq='A-DEC')
        ts = Series([1, 2, 3, 4], index=rng)

        result = ts.resample('M', fill_method='ffill')
        ex_index = period_range('2000-01', '2003-12', freq='M')

        expected = ts.asfreq('M', how='start').reindex(ex_index,
                                                       method='ffill')
        assert_series_equal(result, expected)
 def test_resample_with_only_nat(self):
     # GH 13224
     pi = PeriodIndex([pd.NaT] * 3, freq='S')
     frame = DataFrame([2, 3, 5], index=pi)
     expected_index = PeriodIndex(data=[], freq=pi.freq)
     expected = DataFrame([], index=expected_index)
     result = frame.resample('1s').mean()
     assert_frame_equal(result, expected)
    def test_resample_axis1(self):
        rng = date_range('1/1/2000', '2/29/2000')
        df = DataFrame(np.random.randn(3, len(rng)), columns=rng,
                       index=['a', 'b', 'c'])

        result = df.resample('M', axis=1)
        expected = df.T.resample('M').T
        tm.assert_frame_equal(result, expected)
Beispiel #27
0
def test_asfreq_bug():
    df = DataFrame(data=[1, 3],
                   index=[timedelta(), timedelta(minutes=3)])
    result = df.resample('1T').asfreq()
    expected = DataFrame(data=[1, np.nan, np.nan, 3],
                         index=timedelta_range('0 day',
                                               periods=4,
                                               freq='1T'))
    assert_frame_equal(result, expected)
Beispiel #28
0
 def save_to_file(self, fn):
     gg = DataFrame(self.power_series_apps_table)
     try:
         del gg['diff1']
         del gg['diff2']
     except Exception:
         print('')
         
     gg['Loc Events'] = self.loc.events_apps_1min['Apps']
     apps = self.loc.metadata.get_channels()
     sd = {}
     #Initialize series with 0s
     for app in apps:
         sd[app] = Series(0, index=gg.index)
         
     #Count location events for each appliance
     for index, row in gg.iterrows():
         try:
             if len(row['Loc Events']) > 0:
                 for app in apps:
                     n = row['Loc Events'].count(app)
                     sd[app][index] = n
         except Exception:
             continue
     
     if self.loc.name == 'REDD':
         sd[(3,4)] = sd[3]
         sd[(10,20)] = sd[10]
         del sd[3]
         del sd[4]
         del sd[10]
         del sd[20]
       
     #Change column names and append them to gral table
     locevents = DataFrame(sd)
     locevents.columns = [(str(col) + ' locEv') for col in locevents]        
     for locEv in locevents:
         gg[locEv] = locevents[locEv]
         
     
     #Get power values of each appliance and resample for 1min
     act = DataFrame(self.loc.appliances_consuming_times)
     act = act.resample('1Min')
            
     if self.loc.name == 'REDD':
         del act[3]
         del act[10]
         act.columns = [(3,4), 5,6,7,8,9,11,12,13,14,15,16,17,18,19,(10,20)]
     act.columns = [(str(col) + ' conEv') for col in act]
     
     for app in act:
         gg[app] = act[app]        
     gg.columns = [str(col) for col in gg]
     gg = gg[sorted(gg.columns)]
     gg.to_csv(fn)   
     return
    def test_resample_with_nat(self, periods, values, freq, expected_values):
        # GH 13224
        index = PeriodIndex(periods, freq='S')
        frame = DataFrame(values, index=index)

        expected_index = period_range('1970-01-01 00:00:00',
                                      periods=len(expected_values), freq=freq)
        expected = DataFrame(expected_values, index=expected_index)
        result = frame.resample(freq).mean()
        assert_frame_equal(result, expected)
Beispiel #30
0
def test_resample_extra_index_point():
    # GH#9756
    index = date_range(start='20150101', end='20150331', freq='BM')
    expected = DataFrame({'A': Series([21, 41, 63], index=index)})

    index = date_range(start='20150101', end='20150331', freq='B')
    df = DataFrame(
        {'A': Series(range(len(index)), index=index)}, dtype='int64')
    result = df.resample('BM').last()
    assert_frame_equal(result, expected)
Beispiel #31
0
    def test_annual_upsample(self, simple_period_range_series):
        ts = simple_period_range_series("1/1/1990", "12/31/1995", freq="A-DEC")
        df = DataFrame({"a": ts})
        rdf = df.resample("D").ffill()
        exp = df["a"].resample("D").ffill()
        tm.assert_series_equal(rdf["a"], exp)

        rng = period_range("2000", "2003", freq="A-DEC")
        ts = Series([1, 2, 3, 4], index=rng)

        result = ts.resample("M").ffill()
        ex_index = period_range("2000-01", "2003-12", freq="M")

        expected = ts.asfreq("M", how="start").reindex(ex_index,
                                                       method="ffill")
        tm.assert_series_equal(result, expected)
    def test_annual_upsample(self, simple_period_range_series):
        ts = simple_period_range_series('1/1/1990', '12/31/1995', freq='A-DEC')
        df = DataFrame({'a': ts})
        rdf = df.resample('D').ffill()
        exp = df['a'].resample('D').ffill()
        assert_series_equal(rdf['a'], exp)

        rng = period_range('2000', '2003', freq='A-DEC')
        ts = Series([1, 2, 3, 4], index=rng)

        result = ts.resample('M').ffill()
        ex_index = period_range('2000-01', '2003-12', freq='M')

        expected = ts.asfreq('M', how='start').reindex(ex_index,
                                                       method='ffill')
        assert_series_equal(result, expected)
Beispiel #33
0
def test_resample_timedelta_values():
    # GH 13119
    # check that timedelta dtype is preserved when NaT values are
    # introduced by the resampling

    times = timedelta_range("1 day", "6 day", freq="4D")
    df = DataFrame({"time": times}, index=times)

    times2 = timedelta_range("1 day", "6 day", freq="2D")
    exp = Series(times2, index=times2, name="time")
    exp.iloc[1] = pd.NaT

    res = df.resample("2D").first()["time"]
    tm.assert_series_equal(res, exp)
    res = df["time"].resample("2D").first()
    tm.assert_series_equal(res, exp)
Beispiel #34
0
def test_resample_categorical_data_with_timedeltaindex():
    # GH #12169
    df = DataFrame({"Group_obj": "A"},
                   index=pd.to_timedelta(list(range(20)), unit="s"))
    df["Group"] = df["Group_obj"].astype("category")
    result = df.resample("10s").agg(lambda x: (x.value_counts().index[0]))
    expected = DataFrame(
        {
            "Group_obj": ["A", "A"],
            "Group": ["A", "A"]
        },
        index=pd.TimedeltaIndex([0, 10], unit="s", freq="10s"),
    )
    expected = expected.reindex(["Group_obj", "Group"], axis=1)
    expected["Group"] = expected["Group_obj"]
    tm.assert_frame_equal(result, expected)
def test_resample_timedelta_values():
    # GH 13119
    # check that timedelta dtype is preserved when NaT values are
    # introduced by the resampling

    times = timedelta_range('1 day', '4 day', freq='4D')
    df = DataFrame({'time': times}, index=times)

    times2 = timedelta_range('1 day', '4 day', freq='2D')
    exp = Series(times2, index=times2, name='time')
    exp.iloc[1] = pd.NaT

    res = df.resample('2D').first()['time']
    tm.assert_series_equal(res, exp)
    res = df['time'].resample('2D').first()
    tm.assert_series_equal(res, exp)
Beispiel #36
0
def test_resample_with_timedeltas():

    expected = DataFrame({"A": np.arange(1480)})
    expected = expected.groupby(expected.index // 30).sum()
    expected.index = pd.timedelta_range("0 days", freq="30T", periods=50)

    df = DataFrame(
        {"A": np.arange(1480)}, index=pd.to_timedelta(np.arange(1480), unit="T")
    )
    result = df.resample("30T").sum()

    tm.assert_frame_equal(result, expected)

    s = df["A"]
    result = s.resample("30T").sum()
    tm.assert_series_equal(result, expected["A"])
def test_resample_datetime_values():
    # GH 13119
    # check that datetime dtype is preserved when NaT values are
    # introduced by the resampling

    dates = [datetime(2016, 1, 15), datetime(2016, 1, 19)]
    df = DataFrame({'timestamp': dates}, index=dates)

    exp = Series([datetime(2016, 1, 15), pd.NaT, datetime(2016, 1, 19)],
                 index=date_range('2016-01-15', periods=3, freq='2D'),
                 name='timestamp')

    res = df.resample('2D').first()['timestamp']
    tm.assert_series_equal(res, exp)
    res = df['timestamp'].resample('2D').first()
    tm.assert_series_equal(res, exp)
Beispiel #38
0
def apply_charting_to_df(
    df: pd.DataFrame, chart_period: str, start_time: str, stop_time: str
):
    """Modifies the dataframe based on the chart_period, start dates and end dates
    Parameters
    ----------
        df: dataframe with data loaded
        chart_period: string, describes how often to sample data, default is '1Min' (1 minute)
            see https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#dateoffset-objects
        start_time: datestring in YYYY-MM-DD HH:MM (ex. 2020-08-31 04:00) of when to begin the backtest
        stop_time: datestring of YYYY-MM-DD HH:MM when to stop the backtest
    Returns
        DataFrame, a sorted dataframe ready for consumption by run_backtest
    """
    if df.index.dtype != "datetime64[ns]":
        headers = df.columns.values.tolist()
        headers.extend([df.index.name])
        if "date" not in headers:
            raise Exception(
                "Data does not have a date column. Headers must include date, open, high, low, close, volume."
            )

        time_unit = detect_time_unit(df.date[1])
        df.date = pd.to_datetime(df.date, unit=time_unit)
        df.set_index("date", inplace=True)
    if start_time:
        if isinstance(start_time, datetime) or type(start_time) is int:
            time_unit = detect_time_unit(start_time)
            start_time = pd.to_datetime(start_time, unit=time_unit)
            start_time = start_time.strftime("%Y-%m-%d %H:%M:%S")

    if stop_time:
        if isinstance(stop_time, datetime) or type(stop_time) is int:
            time_unit = detect_time_unit(stop_time)
            stop_time = pd.to_datetime(stop_time, unit=time_unit)
            stop_time = stop_time.strftime("%Y-%m-%d %H:%M:%S")

    df = df.resample(chart_period).first()

    if start_time and stop_time:
        df = df[start_time:stop_time]  # noqa
    elif start_time and not stop_time:
        df = df[start_time:]  # noqa
    elif not start_time and stop_time:
        df = df[:stop_time]

    return df
Beispiel #39
0
    def test_annual_upsample(self):
        targets = ['D', 'B', 'M']

        for month in MONTHS:
            ts = _simple_pts('1/1/1990', '12/31/1995', freq='A-%s' % month)

            for targ, conv, meth in product(targets, ['start', 'end'],
                                            ['ffill', 'bfill']):
                result = ts.resample(targ, fill_method=meth, convention=conv)
                expected = result.to_timestamp(targ, how=conv)
                expected = expected.asfreq(targ, meth).to_period()
                assert_series_equal(result, expected)

        df = DataFrame({'a': ts})
        rdf = df.resample('D', fill_method='ffill')
        exp = df['a'].resample('D', fill_method='ffill')
        assert_series_equal(rdf['a'], exp)
Beispiel #40
0
def resample_calendar(df: pd.DataFrame, offset: str) -> pd.DataFrame:
    """Resample the DataFrame by calendar offset.
    See http://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#anchored-offsets for compatible offsets.
    :param df: data
    :param offset: calendar offset
    :return: result DataFrame
    """

    d = {
        "open": "first",
        "high": "max",
        "low": "min",
        "close": "last",
        "volume": "sum"
    }

    return df.resample(offset).agg(d)
Beispiel #41
0
    def test_asfreq_resample_set_correct_freq(self):
        # GH#5613
        # we test if .asfreq() and .resample() set the correct value for .freq
        df = DataFrame(
            {"date": ["2012-01-01", "2012-01-02", "2012-01-03"], "col": [1, 2, 3]}
        )
        df = df.set_index(to_datetime(df.date))

        # testing the settings before calling .asfreq() and .resample()
        assert df.index.freq is None
        assert df.index.inferred_freq == "D"

        # does .asfreq() set .freq correctly?
        assert df.asfreq("D").index.freq == "D"

        # does .resample() set .freq correctly?
        assert df.resample("D").asfreq().index.freq == "D"
Beispiel #42
0
 def create_fn_list_ror_ts(ror: pd.DataFrame, *, period: str = 'year') -> list:
     """
     Returns a list of functions of weights.
     """
     # Frame.weights_sum_is_one(weights)
     initial_inv = 1000
     fn_list = []
     for x in ror.resample(period):
         def ror_list_fn(weights, y=x):
             df = y[1]  # select ror part of the grouped data
             inv_period_spread = np.asarray(weights) * initial_inv  # rebalancing
             assets_wealth_indexes = inv_period_spread * (1 + df).cumprod()
             wealth_index_local = assets_wealth_indexes.sum(axis=1)
             ror_local = wealth_index_local.pct_change()
             return ror_local
         fn_list.append(ror_list_fn)
     return fn_list
def test_resample_dtype_preservation():

    # GH 12202
    # validation tests for dtype preservation

    df = DataFrame({'date': pd.date_range(start='2016-01-01',
                                          periods=4, freq='W'),
                    'group': [1, 1, 2, 2],
                    'val': Series([5, 6, 7, 8],
                                  dtype='int32')}
                   ).set_index('date')

    result = df.resample('1D').ffill()
    assert result.val.dtype == np.int32

    result = df.groupby('group').resample('1D').ffill()
    assert result.val.dtype == np.int32
Beispiel #44
0
    def test_resample_empty_dataframe(self, freq, resample_method):
        # GH13212
        index = self.create_series().index[:0]
        f = DataFrame(index=index)

        # count retains dimensions too
        result = getattr(f.resample(freq), resample_method)()
        if resample_method != 'size':
            expected = f.copy()
        else:
            # GH14962
            expected = Series([])

        expected.index = f.index._shallow_copy(freq=freq)
        assert_index_equal(result.index, expected.index)
        assert result.index.freq == expected.index.freq
        assert_almost_equal(result, expected, check_dtype=False)
Beispiel #45
0
def test_resample_quantile_timedelta():
    # GH: 29485
    df = DataFrame(
        {"value": pd.to_timedelta(np.arange(4), unit="s")},
        index=pd.date_range("20200101", periods=4, tz="UTC"),
    )
    result = df.resample("2D").quantile(0.99)
    expected = DataFrame(
        {
            "value": [
                pd.Timedelta("0 days 00:00:00.990000"),
                pd.Timedelta("0 days 00:00:02.990000"),
            ]
        },
        index=pd.date_range("20200101", periods=2, tz="UTC", freq="2D"),
    )
    tm.assert_frame_equal(result, expected)
Beispiel #46
0
    def test_asfreq_resample_set_correct_freq(self, frame_or_series):
        # GH#5613
        # we test if .asfreq() and .resample() set the correct value for .freq
        dti = to_datetime(["2012-01-01", "2012-01-02", "2012-01-03"])
        obj = DataFrame({"col": [1, 2, 3]}, index=dti)
        if frame_or_series is Series:
            obj = obj["col"]

        # testing the settings before calling .asfreq() and .resample()
        assert obj.index.freq is None
        assert obj.index.inferred_freq == "D"

        # does .asfreq() set .freq correctly?
        assert obj.asfreq("D").index.freq == "D"

        # does .resample() set .freq correctly?
        assert obj.resample("D").asfreq().index.freq == "D"
Beispiel #47
0
    def test_metadata_propagation_indiv(self):

        # groupby
        df = DataFrame({'A': ['foo', 'bar', 'foo', 'bar',
                              'foo', 'bar', 'foo', 'foo'],
                        'B': ['one', 'one', 'two', 'three',
                              'two', 'two', 'one', 'three'],
                        'C': np.random.randn(8),
                        'D': np.random.randn(8)})
        result = df.groupby('A').sum()
        self.check_metadata(df,result)

        # resample
        df = DataFrame(np.random.randn(1000,2),
                       index=date_range('20130101',periods=1000,freq='s'))
        result = df.resample('1T')
        self.check_metadata(df,result)
Beispiel #48
0
def winter_monthly(df: pd.DataFrame) -> pd.DataFrame:
    """Compute winter monthly deaths as a %age of all winter deaths."""
    df = df.query(("Date >= '1 Jul 2020' and Date <= '30 Jun 2021'"))
    df = df.resample("M").sum()
    assert df["UK"].sum() == 95234  # quality check

    # convert to monthly percentage of total
    df = df.div(df.sum()) * 100

    # data is to mid April 2021: pad remaining months to end of winter period with None
    idx = pd.to_datetime(
        [datetime(2021, 5, 31, 0, 0, 0), datetime(2021, 6, 30, 0, 0, 0)]
    )
    null_data = pd.DataFrame(columns=["UK"], data=[None, None], index=idx)
    df = df.append(null_data)

    return df
Beispiel #49
0
    def test_resample_dup_index(self):

        # GH 4812
        # dup columns with resample raising
        df = DataFrame(np.random.randn(4, 12),
                       index=[2000, 2000, 2000, 2000],
                       columns=[
                           Period(year=2000, month=i + 1, freq='M')
                           for i in range(12)
                       ])
        df.iloc[3, :] = np.nan
        result = df.resample('Q', axis=1)
        expected = df.groupby(lambda x: int((x.month - 1) / 3), axis=1).mean()
        expected.columns = [
            Period(year=2000, quarter=i + 1, freq='Q') for i in range(4)
        ]
        assert_frame_equal(result, expected)
Beispiel #50
0
def ohlcv_fill_up_missing_data(dataframe: DataFrame, timeframe: str,
                               pair: str) -> DataFrame:
    """
    Fills up missing data with 0 volume rows,
    using the previous close as price for "open", "high" "low" and "close", volume is set to 0

    """
    from freqtrade.exchange import timeframe_to_minutes

    ohlcv_dict = {
        'open': 'first',
        'high': 'max',
        'low': 'min',
        'close': 'last',
        'volume': 'sum'
    }
    timeframe_minutes = timeframe_to_minutes(timeframe)
    # Resample to create "NAN" values
    df = dataframe.resample(f'{timeframe_minutes}min',
                            on='date').agg(ohlcv_dict)

    # Forwardfill close for missing columns
    df['close'] = df['close'].fillna(method='ffill')
    # Use close for "open, high, low"
    df.loc[:, ['open', 'high', 'low']] = df[['open', 'high',
                                             'low']].fillna(value={
                                                 'open': df['close'],
                                                 'high': df['close'],
                                                 'low': df['close'],
                                             })
    df.reset_index(inplace=True)
    len_before = len(dataframe)
    len_after = len(df)
    pct_missing = (len_after -
                   len_before) / len_before if len_before > 0 else 0
    if len_before != len_after:
        message = (
            f"Missing data fillup for {pair}: before: {len_before} - after: {len_after}"
            f" - {round(pct_missing * 100, 2)}%")
        if pct_missing > 0.01:
            logger.info(message)
        else:
            # Don't be verbose if only a small amount is missing
            logger.debug(message)
    return df
def test_apply_columns_multilevel():
    # GH 16231
    cols = pd.MultiIndex.from_tuples([("A", "a", "", "one"),
                                      ("B", "b", "i", "two")])
    ind = date_range(start="2017-01-01", freq="15Min", periods=8)
    df = DataFrame(np.array([0] * 16).reshape(8, 2), index=ind, columns=cols)
    agg_dict = {
        col: (np.sum if col[3] == "one" else np.mean)
        for col in df.columns
    }
    result = df.resample("H").apply(lambda x: agg_dict[x.name](x))
    expected = DataFrame(
        2 * [[0, 0.0]],
        index=date_range(start="2017-01-01", freq="1H", periods=2),
        columns=pd.MultiIndex.from_tuples([("A", "a", "", "one"),
                                           ("B", "b", "i", "two")]),
    )
    tm.assert_frame_equal(result, expected)
def test_apply_with_mutated_index():
    # GH 15169
    index = date_range("1-1-2015", "12-31-15", freq="D")
    df = DataFrame(data={"col1": np.random.rand(len(index))}, index=index)

    def f(x):
        s = Series([1, 2], index=["a", "b"])
        return s

    expected = df.groupby(pd.Grouper(freq="M")).apply(f)

    result = df.resample("M").apply(f)
    tm.assert_frame_equal(result, expected)

    # A case for series
    expected = df["col1"].groupby(pd.Grouper(freq="M")).apply(f)
    result = df["col1"].resample("M").apply(f)
    tm.assert_series_equal(result, expected)
Beispiel #53
0
def test_resample_dtype_preservation():

    # GH 12202
    # validation tests for dtype preservation

    df = DataFrame(
        {
            "date": pd.date_range(start="2016-01-01", periods=4, freq="W"),
            "group": [1, 1, 2, 2],
            "val": Series([5, 6, 7, 8], dtype="int32"),
        }
    ).set_index("date")

    result = df.resample("1D").ffill()
    assert result.val.dtype == np.int32

    result = df.groupby("group").resample("1D").ffill()
    assert result.val.dtype == np.int32
Beispiel #54
0
def test_apply_with_mutated_index():
    # GH 15169
    index = pd.date_range('1-1-2015', '12-31-15', freq='D')
    df = DataFrame(data={'col1': np.random.rand(len(index))}, index=index)

    def f(x):
        s = Series([1, 2], index=['a', 'b'])
        return s

    expected = df.groupby(pd.Grouper(freq='M')).apply(f)

    result = df.resample('M').apply(f)
    assert_frame_equal(result, expected)

    # A case for series
    expected = df['col1'].groupby(pd.Grouper(freq='M')).apply(f)
    result = df['col1'].resample('M').apply(f)
    assert_series_equal(result, expected)
def test_agg_nested_dicts():

    np.random.seed(1234)
    index = date_range(datetime(2005, 1, 1), datetime(2005, 1, 10), freq="D")
    index.name = "date"
    df = DataFrame(np.random.rand(10, 2), columns=list("AB"), index=index)
    df_col = df.reset_index()
    df_mult = df_col.copy()
    df_mult.index = pd.MultiIndex.from_arrays([range(10), df.index],
                                              names=["index", "date"])
    r = df.resample("2D")
    cases = [
        r,
        df_col.resample("2D", on="date"),
        df_mult.resample("2D", level="date"),
        df.groupby(pd.Grouper(freq="2D")),
    ]

    msg = "nested renamer is not supported"
    for t in cases:
        with pytest.raises(pd.core.base.SpecificationError, match=msg):
            t.aggregate({
                "r1": {
                    "A": ["mean", "sum"]
                },
                "r2": {
                    "B": ["mean", "sum"]
                }
            })

    for t in cases:

        with pytest.raises(pd.core.base.SpecificationError, match=msg):
            t[["A", "B"]].agg({
                "A": {
                    "ra": ["mean", "std"]
                },
                "B": {
                    "rb": ["mean", "std"]
                }
            })

        with pytest.raises(pd.core.base.SpecificationError, match=msg):
            t.agg({"A": {"ra": ["mean", "std"]}, "B": {"rb": ["mean", "std"]}})
    def hyperopt_loss_function(results: DataFrame, trade_count: int,
                               min_date: datetime, max_date: datetime, *args,
                               **kwargs) -> float:
        """
        Objective function, returns smaller number for more optimal results.

        Uses Sharpe Ratio calculation.
        """
        resample_freq = '1D'
        slippage_per_trade_ratio = 0.0005
        days_in_year = 365
        annual_risk_free_rate = 0.0
        risk_free_rate = annual_risk_free_rate / days_in_year

        # apply slippage per trade to profit_percent
        results.loc[:, 'profit_percent_after_slippage'] = \
            results['profit_percent'] - slippage_per_trade_ratio

        # create the index within the min_date and end max_date
        t_index = date_range(start=min_date,
                             end=max_date,
                             freq=resample_freq,
                             normalize=True)

        sum_daily = (results.resample(resample_freq, on='close_date').agg({
            "profit_percent_after_slippage":
            sum
        }).reindex(t_index).fillna(0))

        total_profit = sum_daily[
            "profit_percent_after_slippage"] - risk_free_rate
        expected_returns_mean = total_profit.mean()
        up_stdev = total_profit.std()

        if up_stdev != 0:
            sharp_ratio = expected_returns_mean / up_stdev * math.sqrt(
                days_in_year)
        else:
            # Define high (negative) sharpe ratio to be clear that this is NOT optimal.
            sharp_ratio = -20.

        # print(t_index, sum_daily, total_profit)
        # print(risk_free_rate, expected_returns_mean, up_stdev, sharp_ratio)
        return -sharp_ratio
Beispiel #57
0
def resample_data(data: pd.DataFrame, frequency: str) -> pd.DataFrame:
    data = data.dropna(subset=["time"])
    data["time"] = pd.to_datetime(data["time"])
    data = data.sort_index(axis=0)
    if "latitude" in data.columns and "longitude" in data.columns:
        original_df = data[["time", "latitude", "longitude"]]
    else:
        original_df = data[["time"]]

    resample_value = "24H" if frequency.lower() == "daily" else "1H"
    averages = pd.DataFrame(data.resample(resample_value, on="time").mean())

    averages["time"] = averages.index
    averages["time"] = averages["time"].apply(lambda x: date_to_str(x))
    averages = averages.reset_index(drop=True)

    if resample_value == "1H":
        original_df["time"] = original_df["time"].apply(
            lambda x: date_to_str_hours(x))
    elif resample_value == "24H":
        original_df["time"] = original_df["time"].apply(
            lambda x: date_to_str_days(x))
    else:
        original_df["time"] = original_df["time"].apply(
            lambda x: date_to_str(x))

    if "latitude" in original_df.columns and "longitude" in original_df.columns:

        def reset_latitude_or_longitude(time: str, field: str):
            date_row = pd.DataFrame(
                original_df.loc[original_df["time"] == time])
            if date_row.empty:
                return time
            return (date_row.iloc[0]["latitude"]
                    if field == "latitude" else date_row.iloc[0]["longitude"])

        averages["latitude"] = averages.apply(
            lambda row: reset_latitude_or_longitude(row["time"], "latitude"),
            axis=1)
        averages["longitude"] = averages.apply(
            lambda row: reset_latitude_or_longitude(row["time"], "longitude"),
            axis=1)

    return averages
Beispiel #58
0
def make_cum_area(trans: pd.DataFrame,
                  account_id: str,
                  color_num: int = 0,
                  time_resolution: int = 0) -> go.Scatter:
    """returns an go Scatter object with cumulative total by time_resolution period for
    the selected account."""

    tr = CONST["time_res_lookup"][time_resolution]
    resample_keyword = tr["resample_keyword"]
    trans = trans.set_index("date")

    bin_amounts = trans.resample(resample_keyword).sum().cumsum()
    bin_amounts["date"] = bin_amounts.index
    bin_amounts["value"] = bin_amounts["amount"]
    bin_amounts["label"] = account_id
    try:
        marker_color = disc_colors[color_num]
    except IndexError:
        # don't ever run out of colors
        marker_color = "var(--Cyan)"
    bin_amounts[
        "texttemplate"] = "%{customdata}"  # workaround for passing variables through layers of plotly
    scatter = go.Scatter(
        x=bin_amounts["date"],
        y=bin_amounts["value"],
        name=account_id,
        mode="lines+markers",
        marker={
            "symbol": "circle",
            "opacity": 1,
            "color": marker_color
        },
        customdata=bin_amounts["label"],
        hovertemplate=
        "%{customdata}<br>%{y:$,.0f}<br>%{x}<extra></extra>",  # TODO: pass in unit for $
        line={
            "width": 0.5,
            "color": marker_color
        },
        hoverlabel={"namelength": 15},
        stackgroup="one",
    )
    return scatter
Beispiel #59
0
def unique_devices_per_bin_size(df: pd.DataFrame, bin_size: str) -> List:
    """
    Utility function for the Device Events dataframe. This function returns
    a json ready dictionary in the following format
    [{'time_seen': '2018-11-17 21:00:00', 'number_of_devices_seen': 0},{...}]
    """

    unique_clients_per_unit_time_df = (
        df.resample(bin_size)["device"].unique().to_frame()
        ["device"].str.len()  # pythonic way to count a list
        .to_frame().rename(columns={
            "device": "devices"
        }).reset_index())

    unique_clients_per_unit_time_df["time"] = unique_clients_per_unit_time_df[
        "time"].map(lambda x: x.strftime("%s"))  # unix time for D3
    data = unique_clients_per_unit_time_df.to_dict("records")

    return data
def resample_data(data: pd.DataFrame, time: str):
    data['date'] = pd.to_datetime(data['date'])
    data.set_index('date', inplace=True)
    data.sort_index(inplace=True)

    # Converting to OHLC format
    data_ohlc = data.resample(time).apply({
        'open': 'first',
        'high': 'max',
        'low': 'min',
        'close': 'last',
        'volume': 'sum'
    })
    data_ohlc.dropna(inplace=True)
    data_ohlc = data_ohlc.reset_index()

    print("Function resample_data done.\n")

    return data_ohlc