Example #1
0
    def test_asfreq(self):
        ts = Series([0., 1., 2.], index=[datetime(2009, 10, 30), datetime(
            2009, 11, 30), datetime(2009, 12, 31)])

        daily_ts = ts.asfreq('B')
        monthly_ts = daily_ts.asfreq('BM')
        assert_series_equal(monthly_ts, ts)

        daily_ts = ts.asfreq('B', method='pad')
        monthly_ts = daily_ts.asfreq('BM')
        assert_series_equal(monthly_ts, ts)

        daily_ts = ts.asfreq(BDay())
        monthly_ts = daily_ts.asfreq(BMonthEnd())
        assert_series_equal(monthly_ts, ts)

        result = ts[:0].asfreq('M')
        self.assertEqual(len(result), 0)
        self.assertIsNot(result, ts)

        daily_ts = ts.asfreq('D', fill_value=-1)
        result = daily_ts.value_counts().sort_index()
        expected = Series([60, 1, 1, 1],
                          index=[-1.0, 2.0, 1.0, 0.0]).sort_index()
        assert_series_equal(result, expected)
Example #2
0
    def test_tz_aware_asfreq(self, tz):
        dr = date_range('2011-12-01', '2012-07-20', freq='D', tz=tz)

        ser = Series(np.random.randn(len(dr)), index=dr)

        # it works!
        ser.asfreq('T')
    def test_tz_aware_asfreq(self):
        dr = date_range("2011-12-01", "2012-07-20", freq="D", tz=self.tzstr("US/Eastern"))

        s = Series(np.random.randn(len(dr)), index=dr)

        # it works!
        s.asfreq("T")
Example #4
0
    def test_tz_aware_asfreq(self):
        dr = date_range('2011-12-01','2012-07-20',freq = 'D', tz = 'US/Eastern')

        s = Series(np.random.randn(len(dr)), index=dr)

        # it works!
        s.asfreq('T')
Example #5
0
def test_asfreq_non_unique():
    # GH #1077
    rng = date_range('1/1/2000', '2/29/2000')
    rng2 = rng.repeat(2).values
    ts = Series(np.random.randn(len(rng2)), index=rng2)

    msg = 'cannot reindex from a duplicate axis'
    with pytest.raises(ValueError, match=msg):
        ts.asfreq('B')
Example #6
0
    def test_asfreq_ts(self):
        index = PeriodIndex(freq='A', start='1/1/2001', end='12/31/2010')
        ts = Series(np.random.randn(len(index)), index=index)
        df = DataFrame(np.random.randn(len(index), 3), index=index)

        result = ts.asfreq('D', how='end')
        df_result = df.asfreq('D', how='end')
        exp_index = index.asfreq('D', how='end')
        assert len(result) == len(ts)
        tm.assert_index_equal(result.index, exp_index)
        tm.assert_index_equal(df_result.index, exp_index)

        result = ts.asfreq('D', how='start')
        assert len(result) == len(ts)
        tm.assert_index_equal(result.index, index.asfreq('D', how='start'))
Example #7
0
    def test_upsample_with_limit(self):
        rng = period_range("1/1/2000", periods=5, freq="A")
        ts = Series(np.random.randn(len(rng)), rng)

        result = ts.resample("M", fill_method="ffill", limit=2, convention="end")
        expected = ts.asfreq("M").reindex(result.index, method="ffill", limit=2)
        assert_series_equal(result, expected)
Example #8
0
    def test_annual_upsample(self):
        targets = ["D", "B", "M"]

        for month in MONTHS:
            ts = _simple_pts("1/1/1990", "12/31/1995", freq="A-%s" % month)

            for targ, conv, meth in product(targets, ["start", "end"], ["ffill", "bfill"]):
                result = ts.resample(targ, fill_method=meth, convention=conv)
                expected = result.to_timestamp(targ, how=conv)
                expected = expected.asfreq(targ, meth).to_period()
                assert_series_equal(result, expected)

        df = DataFrame({"a": ts})
        rdf = df.resample("D", fill_method="ffill")
        exp = df["a"].resample("D", fill_method="ffill")
        assert_series_equal(rdf["a"], exp)

        rng = period_range("2000", "2003", freq="A-DEC")
        ts = Series([1, 2, 3, 4], index=rng)

        result = ts.resample("M", fill_method="ffill")
        ex_index = period_range("2000-01", "2003-12", freq="M")

        expected = ts.asfreq("M", how="start").reindex(ex_index, method="ffill")
        assert_series_equal(result, expected)
Example #9
0
    def test_asfreq(self):
        ts = Series([0., 1., 2.], index=[datetime(2009, 10, 30),
                                         datetime(2009, 11, 30),
                                         datetime(2009, 12, 31)])

        daily_ts = ts.asfreq('WEEKDAY')
        monthly_ts = daily_ts.asfreq('EOM')
        self.assert_(np.array_equal(monthly_ts, ts))

        daily_ts = ts.asfreq('WEEKDAY', method='pad')
        monthly_ts = daily_ts.asfreq('EOM')
        self.assert_(np.array_equal(monthly_ts, ts))

        daily_ts = ts.asfreq(datetools.bday)
        monthly_ts = daily_ts.asfreq(datetools.bmonthEnd)
        self.assert_(np.array_equal(monthly_ts, ts))
Example #10
0
    def test_annual_upsample(self):
        targets = ['D', 'B', 'M']

        for month in MONTHS:
            ts = _simple_pts('1/1/1990', '12/31/1995', freq='A-%s' % month)

            for targ, conv, meth in product(targets, ['start', 'end'],
                                            ['ffill', 'bfill']):
                result = ts.resample(targ, fill_method=meth,
                                     convention=conv)
                expected = result.to_timestamp(targ, how=conv)
                expected = expected.asfreq(targ, meth).to_period()
                assert_series_equal(result, expected)

        df = DataFrame({'a' : ts})
        rdf = df.resample('D', fill_method='ffill')
        exp = df['a'].resample('D', fill_method='ffill')
        assert_series_equal(rdf['a'], exp)


        rng = period_range('2000', '2003', freq='A-DEC')
        ts = Series([1, 2, 3, 4], index=rng)

        result = ts.resample('M', fill_method='ffill')
        ex_index = period_range('2000-01', '2003-12', freq='M')

        expected = ts.asfreq('M', how='start').reindex(ex_index,
                                                       method='ffill')
        assert_series_equal(result, expected)
Example #11
0
    def test_upsample_with_limit(self):
        rng = period_range("1/1/2000", periods=5, freq="A")
        ts = Series(np.random.randn(len(rng)), rng)

        result = ts.resample("M", convention="end").ffill(limit=2)
        expected = ts.asfreq("M").reindex(result.index, method="ffill", limit=2)
        tm.assert_series_equal(result, expected)
Example #12
0
    def test_upsample_with_limit(self):
        rng = period_range('1/1/2000', periods=5, freq='A')
        ts = Series(np.random.randn(len(rng)), rng)

        result = ts.resample('M', convention='end').ffill(limit=2)
        expected = ts.asfreq('M').reindex(result.index, method='ffill',
                                          limit=2)
        assert_series_equal(result, expected)
Example #13
0
    def test_upsample_with_limit(self):
        rng = period_range('1/1/2000', periods=5, freq='A')
        ts = Series(np.random.randn(len(rng)), rng)

        result = ts.resample('M', convention='end').ffill(limit=2)
        expected = ts.asfreq('M').reindex(result.index, method='ffill',
                                          limit=2)
        assert_series_equal(result, expected)
Example #14
0
    def test_asfreq_normalize(self):
        rng = date_range('1/1/2000 09:30', periods=20)
        norm = date_range('1/1/2000', periods=20)
        vals = np.random.randn(20)
        ts = Series(vals, index=rng)

        result = ts.asfreq('D', normalize=True)
        norm = date_range('1/1/2000', periods=20)
        expected = Series(vals, index=norm)

        assert_series_equal(result, expected)

        vals = np.random.randn(20, 3)
        ts = DataFrame(vals, index=rng)

        result = ts.asfreq('D', normalize=True)
        expected = DataFrame(vals, index=norm)

        assert_frame_equal(result, expected)
Example #15
0
    def test_asfreq(self):
        ts = Series([0., 1., 2.], index=[datetime(2009, 10, 30), datetime(
            2009, 11, 30), datetime(2009, 12, 31)])

        daily_ts = ts.asfreq('B')
        monthly_ts = daily_ts.asfreq('BM')
        self.assert_series_equal(monthly_ts, ts)

        daily_ts = ts.asfreq('B', method='pad')
        monthly_ts = daily_ts.asfreq('BM')
        self.assert_series_equal(monthly_ts, ts)

        daily_ts = ts.asfreq(datetools.bday)
        monthly_ts = daily_ts.asfreq(datetools.bmonthEnd)
        self.assert_series_equal(monthly_ts, ts)

        result = ts[:0].asfreq('M')
        self.assertEqual(len(result), 0)
        self.assertIsNot(result, ts)
    def test_asfreq_normalize(self):
        rng = date_range('1/1/2000 09:30', periods=20)
        norm = date_range('1/1/2000', periods=20)
        vals = np.random.randn(20)
        ts = Series(vals, index=rng)

        result = ts.asfreq('D', normalize=True)
        norm = date_range('1/1/2000', periods=20)
        expected = Series(vals, index=norm)

        assert_series_equal(result, expected)

        vals = np.random.randn(20, 3)
        ts = DataFrame(vals, index=rng)

        result = ts.asfreq('D', normalize=True)
        expected = DataFrame(vals, index=norm)

        assert_frame_equal(result, expected)
Example #17
0
    def test_asfreq_fillvalue(self):
        # test for fill value during upsampling, related to issue 3715

        # setup
        rng = date_range("1/1/2016", periods=10, freq="2S")
        ts = Series(np.arange(len(rng)), index=rng)
        df = DataFrame({"one": ts})

        # insert pre-existing missing value
        df.loc["2016-01-01 00:00:08", "one"] = None

        actual_df = df.asfreq(freq="1S", fill_value=9.0)
        expected_df = df.asfreq(freq="1S").fillna(9.0)
        expected_df.loc["2016-01-01 00:00:08", "one"] = None
        tm.assert_frame_equal(expected_df, actual_df)

        expected_series = ts.asfreq(freq="1S").fillna(9.0)
        actual_series = ts.asfreq(freq="1S", fill_value=9.0)
        tm.assert_series_equal(expected_series, actual_series)
    def test_mixed_freq_alignment(self):
        ts_ind = date_range("2012-01-01 13:00", "2012-01-02", freq="H")
        ts_data = np.random.randn(12)

        ts = Series(ts_data, index=ts_ind)
        ts2 = ts.asfreq("T").interpolate()

        ax = ts.plot()
        ts2.plot(style="r")

        self.assertEqual(ax.lines[0].get_xdata()[0], ax.lines[1].get_xdata()[0])
    def test_mixed_freq_alignment(self):
        ts_ind = date_range('2012-01-01 13:00', '2012-01-02', freq='H')
        ts_data = np.random.randn(12)

        ts = Series(ts_data, index=ts_ind)
        ts2 = ts.asfreq('T').interpolate()

        ax = ts.plot()
        ts2.plot(style='r')

        assert ax.lines[0].get_xdata()[0] == ax.lines[1].get_xdata()[0]
Example #20
0
    def test_mixed_freq_alignment(self):
        ts_ind = date_range('2012-01-01 13:00', '2012-01-02', freq='H')
        ts_data = np.random.randn(12)

        ts = Series(ts_data, index=ts_ind)
        ts2 = ts.asfreq('T').interpolate()

        ax = ts.plot()
        ts2.plot(style='r')

        assert ax.lines[0].get_xdata()[0] == ax.lines[1].get_xdata()[0]
Example #21
0
    def test_resample_weekly_all_na(self):
        rng = date_range('1/1/2000', periods=10, freq='W-WED')
        ts = Series(np.random.randn(len(rng)), index=rng)

        result = ts.resample('W-THU')

        self.assert_(result.isnull().all())

        result = ts.resample('W-THU', fill_method='ffill')[:-1]
        expected = ts.asfreq('W-THU', method='ffill')
        assert_series_equal(result, expected)
    def test_resample_weekly_all_na(self):
        rng = date_range('1/1/2000', periods=10, freq='W-WED')
        ts = Series(np.random.randn(len(rng)), index=rng)

        result = ts.resample('W-THU')

        self.assert_(result.isnull().all())

        result = ts.resample('W-THU', fill_method='ffill')[:-1]
        expected = ts.asfreq('W-THU', method='ffill')
        assert_series_equal(result, expected)
Example #23
0
    def test_resample_weekly_all_na(self):
        rng = date_range("1/1/2000", periods=10, freq="W-WED")
        ts = Series(np.random.randn(len(rng)), index=rng)

        result = ts.resample("W-THU")

        self.assert_(result.isnull().all())

        result = ts.resample("W-THU", fill_method="ffill")[:-1]
        expected = ts.asfreq("W-THU", method="ffill")
        assert_series_equal(result, expected)
Example #24
0
    def test_resample_weekly_all_na(self):
        rng = date_range("1/1/2000", periods=10, freq="W-WED")
        ts = Series(np.random.randn(len(rng)), index=rng)

        result = ts.resample("W-THU").asfreq()

        assert result.isna().all()

        result = ts.resample("W-THU").asfreq().ffill()[:-1]
        expected = ts.asfreq("W-THU").ffill()
        assert_series_equal(result, expected)
Example #25
0
 def make_ts(self, v):
     x = v['data']['x']['data']
     new_x = []
     for tp in x:
         new_x.append(parser.parse(tp))
     y = v['data']['y'][0]['data']
     new_y = []
     for tp in y:
         new_y.append(float(tp))
     ts = Series(new_y, index=new_x)
     ts = ts.asfreq('D', method='pad')
     return ts
Example #26
0
    def test_monthly_convention_span(self):
        rng = period_range("2000-01", periods=3, freq="M")
        ts = Series(np.arange(3), index=rng)

        # hacky way to get same thing
        exp_index = period_range("2000-01-01", "2000-03-31", freq="D")
        expected = ts.asfreq("D", how="end").reindex(exp_index)
        expected = expected.fillna(method="bfill")

        result = ts.resample("D").mean()

        tm.assert_series_equal(result, expected)
Example #27
0
    def test_mixed_freq_alignment(self):
        import matplotlib.pyplot as plt
        ts_ind = date_range('2012-01-01 13:00', '2012-01-02', freq='H')
        ts_data = np.random.randn(12)

        ts = Series(ts_data, index=ts_ind)
        ts2 = ts.asfreq('T').interpolate()

        plt.close('all')
        ax = ts.plot()
        ts2.plot(style='r')

        self.assert_(ax.lines[0].get_xdata()[0] == ax.lines[1].get_xdata()[0])
Example #28
0
def set_freq(price: pd.Series,
             dividend: pd.Series = None,
             freq: any = None,
             groupby: bool = True,
             method: any = 'mean',
             ffill: bool = True) -> pd.Series:
    """
    Set the frequency for the given price / dividend.

    Args:
        price: the price time-series to set the frequency for
        dividend: any dividend paid to set the frequency for
        freq: the frequency of periods for calculating returns
        groupby: whether to use groupby or asfreq
        method: the method to use for aggregating the time frequency group by
        ffill: whether to forward fill missing values (i.e., NaN values)

    Returns:
        a tuple of:
        - the price after setting the frequency
        - the dividend after setting the frequency

    """
    if freq is not None:  # adjust the frequency of the data
        if groupby:  # use a groupby to set the frequency
            price = price.groupby(pd.Grouper(freq=freq)).agg(method)
        else:  # just use asfreq (i.e., take the last value in the period)
            price = price.asfreq(freq, method=method)
        if dividend is not None:  # adjust the frequency of the dividend
            if groupby:  # use a groupby to set the frequency
                dividend = dividend.groupby(pd.Grouper(freq=freq)).agg(method)
            else:  # just use asfreq (i.e., take the last value in the period)
                dividend = dividend.asfreq(freq, method=method)
    if ffill:  # forward fill missing values
        price = price.ffill()
        if dividend is not None:
            dividend = dividend.ffill()
    return price, dividend
Example #29
0
    def test_mixed_freq_alignment(self):
        import matplotlib.pyplot as plt

        ts_ind = date_range("2012-01-01 13:00", "2012-01-02", freq="H")
        ts_data = np.random.randn(12)

        ts = Series(ts_data, index=ts_ind)
        ts2 = ts.asfreq("T").interpolate()

        plt.close("all")
        ax = ts.plot()
        ts2.plot(style="r")

        self.assert_(ax.lines[0].get_xdata()[0] == ax.lines[1].get_xdata()[0])
Example #30
0
    def test_annual_upsample(self):
        ts = _simple_pts("1/1/1990", "12/31/1995", freq="A-DEC")
        df = DataFrame({"a": ts})
        rdf = df.resample("D", fill_method="ffill")
        exp = df["a"].resample("D", fill_method="ffill")
        assert_series_equal(rdf["a"], exp)

        rng = period_range("2000", "2003", freq="A-DEC")
        ts = Series([1, 2, 3, 4], index=rng)

        result = ts.resample("M", fill_method="ffill")
        ex_index = period_range("2000-01", "2003-12", freq="M")

        expected = ts.asfreq("M", how="start").reindex(ex_index, method="ffill")
        assert_series_equal(result, expected)
Example #31
0
    def test_add_series_with_period_index(self):
        rng = pd.period_range('1/1/2000', '1/1/2010', freq='A')
        ts = Series(np.random.randn(len(rng)), index=rng)

        result = ts + ts[::2]
        expected = ts + ts
        expected[1::2] = np.nan
        tm.assert_series_equal(result, expected)

        result = ts + _permute(ts[::2])
        tm.assert_series_equal(result, expected)

        msg = "Input has different freq=D from PeriodIndex\\(freq=A-DEC\\)"
        with pytest.raises(IncompatibleFrequency, match=msg):
            ts + ts.asfreq('D', how="end")
Example #32
0
    def test_add_series_with_period_index(self):
        rng = pd.period_range('1/1/2000', '1/1/2010', freq='A')
        ts = Series(np.random.randn(len(rng)), index=rng)

        result = ts + ts[::2]
        expected = ts + ts
        expected[1::2] = np.nan
        tm.assert_series_equal(result, expected)

        result = ts + _permute(ts[::2])
        tm.assert_series_equal(result, expected)

        msg = "Input has different freq=D from PeriodIndex\\(freq=A-DEC\\)"
        with pytest.raises(IncompatibleFrequency, match=msg):
            ts + ts.asfreq('D', how="end")
Example #33
0
    def test_annual_upsample(self, simple_period_range_series):
        ts = simple_period_range_series("1/1/1990", "12/31/1995", freq="A-DEC")
        df = DataFrame({"a": ts})
        rdf = df.resample("D").ffill()
        exp = df["a"].resample("D").ffill()
        tm.assert_series_equal(rdf["a"], exp)

        rng = period_range("2000", "2003", freq="A-DEC")
        ts = Series([1, 2, 3, 4], index=rng)

        result = ts.resample("M").ffill()
        ex_index = period_range("2000-01", "2003-12", freq="M")

        expected = ts.asfreq("M", how="start").reindex(ex_index, method="ffill")
        tm.assert_series_equal(result, expected)
    def test_annual_upsample(self):
        ts = _simple_pts('1/1/1990', '12/31/1995', freq='A-DEC')
        df = DataFrame({'a': ts})
        rdf = df.resample('D', fill_method='ffill')
        exp = df['a'].resample('D', fill_method='ffill')
        assert_series_equal(rdf['a'], exp)

        rng = period_range('2000', '2003', freq='A-DEC')
        ts = Series([1, 2, 3, 4], index=rng)

        result = ts.resample('M', fill_method='ffill')
        ex_index = period_range('2000-01', '2003-12', freq='M')

        expected = ts.asfreq('M', how='start').reindex(ex_index,
                                                       method='ffill')
        assert_series_equal(result, expected)
Example #35
0
    def test_annual_upsample(self):
        ts = _simple_pts('1/1/1990', '12/31/1995', freq='A-DEC')
        df = DataFrame({'a': ts})
        rdf = df.resample('D', fill_method='ffill')
        exp = df['a'].resample('D', fill_method='ffill')
        assert_series_equal(rdf['a'], exp)

        rng = period_range('2000', '2003', freq='A-DEC')
        ts = Series([1, 2, 3, 4], index=rng)

        result = ts.resample('M', fill_method='ffill')
        ex_index = period_range('2000-01', '2003-12', freq='M')

        expected = ts.asfreq('M', how='start').reindex(ex_index,
                                                       method='ffill')
        assert_series_equal(result, expected)
Example #36
0
    def test_align_series(self):
        rng = period_range('1/1/2000', '1/1/2010', freq='A')
        ts = Series(np.random.randn(len(rng)), index=rng)

        result = ts + ts[::2]
        expected = ts + ts
        expected[1::2] = np.nan
        tm.assert_series_equal(result, expected)

        result = ts + _permute(ts[::2])
        tm.assert_series_equal(result, expected)

        # it works!
        for kind in ['inner', 'outer', 'left', 'right']:
            ts.align(ts[::2], join=kind)
        msg = "Input has different freq=D from PeriodIndex\\(freq=A-DEC\\)"
        with tm.assert_raises_regex(period.IncompatibleFrequency, msg):
            ts + ts.asfreq('D', how="end")
Example #37
0
    def test_align_series(self):
        rng = period_range('1/1/2000', '1/1/2010', freq='A')
        ts = Series(np.random.randn(len(rng)), index=rng)

        result = ts + ts[::2]
        expected = ts + ts
        expected[1::2] = np.nan
        tm.assert_series_equal(result, expected)

        result = ts + _permute(ts[::2])
        tm.assert_series_equal(result, expected)

        # it works!
        for kind in ['inner', 'outer', 'left', 'right']:
            ts.align(ts[::2], join=kind)
        msg = "Input has different freq=D from PeriodIndex\\(freq=A-DEC\\)"
        with tm.assert_raises_regex(period.IncompatibleFrequency, msg):
            ts + ts.asfreq('D', how="end")
Example #38
0
    def getFund(id):
        count = cur.execute('SELECT * FROM profit where id = ' + str(id))
        results = cur.fetchall()
        valueYuan = []
        valueYuanP = []
        dateYuan = []
        for r1 in results:
            name = r1[1]
            valueYuan.append(r1[2] / 100)
            dateYuan.append(
                datetime.datetime(int(str(r1[4]).split('-')[0]),
                                  int(str(r1[4]).split('-')[1]),
                                  int(str(r1[4]).split('-')[2])))

        tsYuan = Series(valueYuan, index=dateYuan)
        tsfYuanm = tsYuan.asfreq('M', method='pad')
        print str(id) + ": " + str((len(tsfYuanm))) + "months ",
        dateP = []
        for (k, d) in tsfYuanm.iteritems():
            #print k, d
            valueYuanP.append(d)
            dateP.append(str(k).split(" ")[0])
        valueYuanP = dayProfit(valueYuanP)
        print "start from: " + dateP[0] + "  to :" + dateP[len(dateP) - 1]
        print name + ":  ",
        period = dateP[0] + " to " + dateP[len(dateP) - 1]
        printResult(name, period, valueYuanP)
        tsfYuanmP = Series(valueYuanP, index=dateP)
        valueYuanPrice = [x + 1 for x in valueYuan]
        '''plot full period
        plt.plot(dateYuan,valueYuanPrice)
        plt.title(str(id))
        fname=str(id)+'.png'
        plt.savefig(fname, dpi=75)
        #plt.show()
        '''
        return tsfYuanmP
Example #39
0
index

p = pd.Period('2007', freq='A-DEC')
p.asfreq('M', how='start')
p.asfreq('M', how='end')

p = pd.Period('2007', freq='A-JUN')
p.asfreq('M', 'start')

p.asfreq('M', 'end')
p = pd.Period('Aug-2007', 'M')
p.asfreq('A-JUN')
rng = pd.period_range('2006', '2009', freq='A-DEC')
ts = Series(np.random.randn(len(rng)), index=rng)
ts
ts.asfreq('M', how='start')
ts.asfreq('B', how='end')

p = pd.Period('2012Q4', freq='Q-JAN')
p
p.asfreq('D', 'start')
p.asfreq('D', 'end')
p4pm = (p.asfreq('B', 'e') - 1).asfreq('T', 's') + 16 * 60
p4pm
p4pm.to_timestamp()
rng = pd.period_range('2011Q3', '2012Q4', freq='Q-JAN')
ts = Series(np.arange(len(rng)), index=rng)
ts
new_rng = (rng.asfreq('B', 'e') - 1).asfreq('T', 's') + 16 * 60
ts.index = new_rng.to_timestamp()
ts
j = ts2 + ts1.reindex(ts2.index, method='ffill')
print(j)

gdp = Series([1.78, 1.94, 2.08, 2.01, 2.15, 2.31, 2.46],
             index=pd.period_range('1984Q2', periods=7, freq='Q-SEP'))
print(gdp)

infl = Series([0.025, 0.045, 0.037, 0.04],
              index=pd.period_range('1982', periods=4, freq='A-DEC'))
print(infl)  # 显然和gdp的时间频率不一样"

#跟timestamp的时间序列不同,由period索引的两个不同频率的时间序列之间
#必须进行显式转换

#调整季度
infl_q = infl.asfreq('Q-SEP', how='end')
print(infl_q)
#索引匹配并填充缺失值
k = infl_q.reindex(gdp.index, method='ffill')
print(k)

#时间和最当前数据提取
rng = pd.date_range('2012-06-01 09:30', '2012-06-01 15:59', freq='T')
# 交易时段按分钟采样
rng = rng.append([rng + pd.offsets.BDay(i) for i in range(1, 4)])
# 再补4天
ts = Series(np.arange(len(rng), dtype=float), index=rng)
print(ts.head())
print(ts.tail())
#利用python的datetime.time对象进行索引即可抽取出这些时间点上的值
print(ts[time(10, 0)])  #抽取10点的数据
Example #41
0
print(ts1.resample('B').ffill())

dates = pd.DatetimeIndex(['2012-6-12', '2012-6-17', '2012-6-18',
                          '2012-6-21', '2012-6-22', '2012-6-29'])
ts2 = Series(np.random.randn(6), index=dates)
print(ts2)

print(ts1.reindex(ts2.index, method='ffill'))
print(ts2 + ts1.reindex(ts2.index, method='ffill'))

gdp = Series([1.78, 1.94, 2.08, 2.01, 2.15, 2.31, 2.46],
             index=pd.period_range('1984Q2', periods=7, freq='Q-SEP'))
inf1 = Series([0.025, 0.045, 0.037, 0.04],
              index=pd.period_range('1982', periods=4, freq='A-DEC'))

inf1_q = inf1.asfreq('Q-SEP', how='end')
print(inf1_q)
print(inf1_q.reindex(gdp.index, method='ffill'))

rng = pd.date_range('2012-06-01 09:30', '2012-06-01 15:59', freq='T')
rng = rng.append([rng + pd.offsets.BDay(i) for i in range(1, 4)])

ts = Series(np.arange(len(rng), dtype=float), index=rng)
print(ts)
print(ts[time(10, 0)])
print(ts.at_time(time(10, 0)))
print(ts.between_time(time(10, 0), time(10, 1)))

indexer = np.sort(np.random.permutation(len(ts))[700:])
irr_ts = ts.copy()
irr_ts[indexer] = np.nan
# #### # PeriodIndex 객체도 마찬가지로 다루자

# In[ ]:


rng = pd.period_range('2006', '2009', freq='A-DEC'); rng
ts = Series(np.random.randn(len(rng)), index=rng)
ts


# In[ ]:


ts.asfreq
ts.asfreq('M', how='start')


# In[ ]:


ts.asfreq('B', how='end')


# ### 10.5.2 Quarterly period frequencies
#   * 회계 연도의 끝에 따라 의미가 달라짐
#   * 12 가지의 분기 빈도: Q-JAN ~ Q-DEC
#     - 4/4분기의 마지막 달이 Q- 다음에 오는 달

# In[ ]:
Example #43
0
def _freq_to_period(x: pd.Series, freq: Frequency = Frequency.YEAR):
    """
    Given input series x with a DateTimeIndex and a desired temporal frequency (period), returns x with all NaNs
    forward-filled (according to x's index's DateTime frequency) and the number of data points in a period.

    freq should be the length of time in which x's cycles repeat. For example: yearly retail sales cycle, yearly
    temperature fluctuation cycle.

    For example: 1) If x is a daily series and freq = YEARLY, then there are 365 data points in a period; 2) If x is
    a monthly series and freq = QUARTERLY, then there are 3 data points in a period.

    Freq parameter only applies when data frequency is:
    'B' and frequency == Weekly --> period = 5
    'B' and frequency == Monthly --> convert to 'D' and period = 30
    'D' and frequency == Weekly --> period = 7
    'D' and frequency == Monthly --> period = 30
    'M' and frequency == Quarterly --> Period = 3
    'W' and frequency == Quarterly --> period = 13
    """
    if not isinstance(x.index, pd.DatetimeIndex):
        raise MqValueError("Series must have a pandas.DateTimeIndex.")
    pfreq = getattr(getattr(x, 'index', None), 'inferred_freq', None)
    try:
        period = statsmodels.tsa.seasonal.freq_to_period(pfreq)
    except (ValueError, AttributeError):
        period = None
    if period in [7, None]:  # daily
        x = x.asfreq('D', method='ffill')
        if freq == Frequency.YEAR:
            return x, 365
        elif freq == Frequency.QUARTER:
            return x, 91
        elif freq == Frequency.MONTH:
            return x, 30
        else:
            return x, 7
    elif period == 5:  # business day
        if freq == Frequency.YEAR:
            return x.asfreq('D', method='ffill'), 365
        if freq == Frequency.QUARTER:
            return x.asfreq('D', method='ffill'), 91
        elif freq == Frequency.MONTH:
            return x.asfreq('D', method='ffill'), 30
        else:  # freq == Frequency.WEEKLY:
            return x.asfreq('B', method='ffill'), 5
    elif period == 52:  # weekly frequency
        x = x.asfreq('W', method='ffill')
        if freq == Frequency.YEAR:
            return x, period
        elif freq == Frequency.QUARTER:
            return x, 13
        elif freq == Frequency.MONTH:
            return x, 4
        else:
            raise MqValueError(
                f'Frequency {freq.value} not compatible with series with frequency {pfreq}.'
            )
    elif period == 12:  # monthly frequency
        x = x.asfreq('M', method='ffill')
        if freq == Frequency.YEAR:
            return x, period
        elif freq == Frequency.QUARTER:
            return x, 3
        else:
            raise MqValueError(
                f'Frequency {freq.value} not compatible with series with frequency {pfreq}.'
            )
    return x, period
Example #44
0
def main(st, et):
    if st:
        start_time = st
    else:
        start_time = arrow.utcnow().replace(minutes=common.DEFAULT_LOOKBACK_MINUTES)

    if et:
        end_time = et
    else:
        end_time = arrow.utcnow()

    all_regions = set()
    all_product_descriptions = set()
    all_instance_types = set()
    all_instance_zones = set()

    session = botocore.session.get_session()
    ec2 = session.get_service('ec2')
    operation = ec2.get_operation('DescribeSpotPriceHistory')
    local_timeseries = {}

    vals = {}
    tss = {}
    print 'Preparing...'
    for region in AWS_ON_DEMAND_PRICES:
        reg_key = region.replace('-','_')
        if region not in vals:
            vals[reg_key] = {}
            tss[reg_key] = {}
        for zone in AWS_REGIONS_TO_ZONES[region]:
            # print 'Zone: %s' % zone
            if zone not in vals[reg_key]:
                vals[reg_key][zone] = {}
                tss[reg_key][zone] = {}
            for product in AWS_ON_DEMAND_PRICES[region]:
                # print 'Product: %s' % product
                if not AWS_ON_DEMAND_PRICES[region][product]:
                    print "WARNING: Empty %s:%s" % (region, product)
                    continue
                if product not in vals[reg_key][zone]:
                    vals[reg_key][zone][product] = {}
                    tss[reg_key][zone][product] = {}
                for inst_type in common.AWS_ON_DEMAND_PRICES[region][product]:
                    # print "%s/%s/%s/%s" % (reg_key, zone, product, inst_type)
                    vals[reg_key][zone][product][inst_type] = []
                    tss[reg_key][zone][product][inst_type] = []
    #sys.exit(1)
    for region in ec2.region_names:
        all_regions.add(region)
        cnt = 0
        next_token = None
        print 'Collecting spot prices from region: %s for %s to %s' % (region, start_time.format(_FMT), end_time.format(_FMT))
        sys.stdout.flush()
        # if region != 'us-east-1':
        #continue
        while True:
            endpoint = ec2.get_endpoint(region)
            if next_token:
                response, data = operation.call(
                    endpoint,
                    start_time=start_time.format(_FMT),
                    end_time=end_time.format(_FMT),
                    next_token=next_token,
                )
            else:
                response, data = operation.call(
                    endpoint,
                    start_time=start_time.format(_FMT),
                )
            next_token = data.get('NextToken')
            spot_data = data.get('SpotPriceHistory', [])
            first_entry_in_batch = True
            sys.stdout.flush()
            for d in spot_data:
                
                ts = common.ts_from_aws(d)
                
                if first_entry_in_batch:
                    print "Fetched %s records starting with %s" % (len(spot_data), d['Timestamp'])
                    first_entry_in_batch = False
                
                # {u'Timestamp': '2014-04-10T23:49:21.000Z', u'ProductDescription': 'Linux/UNIX (Amazon VPC)', u'InstanceType': 'hi1.4xlarge', u'SpotPrice': '0.128300', u'AvailabilityZone': 'us-east-1b'}
                reg_key = region.replace('-','_')
                d['Region'] = reg_key
                
                
                d['InstanceTypeNorm'] = d['InstanceType'].replace('.','_')

                value = d['SpotPrice']

                zone = d['AvailabilityZone'].replace('-','_')
                product = d['ProductDescription'].replace('-','_').replace('(','').replace(')','_').replace(' ','_').replace('/','_')
                if product.endswith('_'):
                    product=product[:-1]
                inst_type = d['InstanceTypeNorm'].replace('-','_')

                tags = { 
                    'cloud' : 'aws',
                    'region' : reg_key,
                    'zone'  : zone,
                    'product' : product,
                    'inst_type' : inst_type,
                    'units' : 'USD'
                    }
                try:
                    vals[reg_key][zone][product][inst_type].append(value)
                    tss[reg_key][zone][product][inst_type].append(ts)
                except KeyError:
                    print "No on-demand info for %s/%s/%s/%s" % (reg_key,zone,product,inst_type)

                common.otsdb_send('price_spot', value, tags, ts, False)  
                tags['price_type'] = 'spot'
                common.otsdb_send('price', value, tags, ts, False)
                cnt += 1

            if not next_token:
                break
        print "Found %s price points" % cnt
        for zone in tss[reg_key]:
            for product in tss[reg_key][zone]:
                for inst_type in tss[reg_key][zone][product]:
                    if not tss[reg_key][zone][product][inst_type]:
                        print "No spot info for %s/%s/%s/%s." % (reg_key, zone, product, inst_type)
                        continue
                    print "%s/%s/%s/%s" % (reg_key, zone, product, inst_type)
                    tags = { 
                        'cloud' : 'aws',
                        'region' : reg_key,
                        'zone'  : zone,
                        'product' : product,
                        'inst_type' : inst_type,
                        'units' : 'USD'
                        }

                    tss_ts = tss[reg_key][zone][product][inst_type]
                    tss_ts.sort()
                    tss_dt = to_datetime(tss_ts, unit='s')
                    s_data = vals[reg_key][zone][product][inst_type]
                    s1 = Series(s_data, tss_dt)
                    # print "Creating Series(%s, %s) from %s; length: %s" % (s_data, tss_dt, tss_ts, len(s1))

                    if len(s1) > 1:
                        # We already took care of 1-length (no fill)
                        s2 = s1.asfreq('1Min', method='ffill')
                        # print "Sparse series:\n%s\n" % s1
                        # print "Filled series:\n%s\n" % s2
                        # print "Sparse: %s, filled: %s" % (len(s1), len(s2))
                        for (dt,value) in s2.iteritems():
                            ts = arrow.Arrow.fromdatetime(dt).timestamp
                            common.otsdb_send('price_spot', value, tags, ts, False)  
                            tags['price_type'] = 'spot'

                            common.otsdb_send('price', value, tags, ts, False)
        sys.stdout.flush()
Example #45
0
dates = pd.DatetimeIndex(['2012-6-12', '2012-6-17', '2012-6-18',
                          '2012-6-21', '2012-6-22', '2012-6-29'])
ts2 = Series(np.random.randn(6), index=dates)
print ts2

# 将ts1中最当前的值向前填充加到ts2上, 即维持ts2的索引
# 先使用ts2的索引来填充ts1的值
print ts1.reindex(ts2.index, method='ffill')
# 然后在加上去
print ts2 + ts1.reindex(ts2.index, method='ffill')


# 使用Period表示时间区间
gdp = Series([1.78, 1.94, 2.08, 2.01, 2.15, 2.31, 2.46],
             index=pd.period_range('1984Q2', periods=7, freq='Q-SEP'))

infl = Series([0.025, 0.045, 0.037, 0.04],
              index=pd.period_range('1982', periods=4, freq='A-DEC'))

print gdp
print infl

# 和Timestamp的时间序列不同Q-SEP得到该频率下的正确时间
infl_q = infl.asfreq('Q-SEP', how='end')
print infl_q

# 重索引
print infl_q.reindex(gdp.index, method='ffill')


Example #46
0
# 11.5.1区间频率转换
p = pd.Period('2007', freq='A-DEC')
print(p)
print(p.asfreq('M', how='start'))
print(p.asfreq('M', how='end'))
p = pd.Period('2007', freq='A-JUN')
print(p)
print(p.asfreq('M', 'start'))
print(p.asfreq('M', 'end'))

p = pd.Period('Aug-2007', 'M')
print(p.asfreq('A-JUN'))
rng = pd.period_range('2006', '2009', freq='A-DEC')
ts = Series(np.random.randn(len(rng)), index=rng)
print(ts)
print(ts.asfreq('M', how='start'))
print(ts.asfreq('B', how='end'))

# 11.5.2季度区间频率
p = pd.Period('2012Q4', freq='Q-JAN')
print(p)
print(p.asfreq('D', 'start'))
print(p.asfreq('D', 'end'))

p4pm = (p.asfreq('B', 'e') - 1).asfreq('T', 's') + 16 * 60
print(p4pm)

rng = pd.period_range('2011Q3', '2012Q4', freq='Q-JAN')
ts = Series(np.arange(len(rng)), index=rng)
print(ts)
p.asfreq('M', how='end')

# In[28]:

p = pd.Period('2007-08', 'M')
p.asfreq('A-JUN')

# In[35]:

rng = pd.date_range('2007', '2010', freq='A-DEC')
ts = Series(np.random.randn(len(rng)), index=rng)
ts

# In[36]:

ts.asfreq('M', how='start')

# In[37]:

p = pd.Period('2012Q4', freq='Q-JAN')
p

# In[38]:

p.asfreq('D', 'start')

# In[39]:

p4pm = (p.asfreq('B', 'e') - 1).asfreq('T', 's') + 16 * 60
p4pm
Example #48
0
# print(ts1)

dates = pd.DatetimeIndex([
    '2012-6-12', '2012-6-17', '2012-6-18', '2012-6-21', '2012-6-22',
    '2012-6-29'
])
ts2 = Series(np.random.randn(6), index=dates)
# print(ts2)
# print(ts1.reindex(ts2.index).ffill())
# print(ts2 + ts1.reindex(ts2.index, method='ffill'))

gdp = Series([1.78, 1.95, 2.08, 2.01, 2.15, 2.31, 2.46],
             index=pd.period_range('1984Q2', periods=7, freq='Q-SEP'))
inf1 = Series([0.025, 0.045, 0.037, 0.04],
              index=pd.period_range('1982', periods=4, freq='A-DEC'))
inf1_q = inf1.asfreq('Q-SEP', how='end')

# print(gdp)
# print(inf1)
# print(inf1_q)
#
# print(inf1_q.reindex(gdp.index, method='ffill'))

rng = pd.date_range('2012-06-01 09:30', '2012-06-01 15:59', freq='T')
rng = rng.append([rng + pd.offsets.BDay(i) for i in range(1, 4)])
# print(rng)

ts = Series(np.arange(len(rng), dtype=float), index=rng)

# print(ts)
#
Example #49
0
print(index)

p = pd.Period('2007', freq='A-DEC')
print(p.asfreq('M', how='start'))
print(p.asfreq('M', how='end'))

p = pd.Period('2007', freq='A-JUN')
print(p.asfreq('M', how='start'))
print(p.asfreq('M', how='end'))

p = pd.Period('2007-08', 'M')
print(p.asfreq('A-JUN'))
rng = pd.period_range('2006', '2009', freq='A-DEC')
ts = Series(np.random.randn(len(rng)), index=rng)
print(ts)
print(ts.asfreq('M', how='start'))
print(ts.asfreq('M', how='end'))

p = pd.Period('2014Q4', freq='Q-JAN')
print(p)
print(p.asfreq('D', 'start'))
print(p.asfreq('D', 'end'))

p4pm = (p.asfreq('B', 'e') - 1).asfreq('T', 's') + 16 * 60
print(p4pm)
print(p4pm.to_timestamp())

rng = pd.period_range('2011Q3', '2012Q4', freq='Q-JAN')
ts = Series(np.arange(len(rng)), index=rng)
new_rng = (rng.asfreq('B', 'e') - 1).asfreq('T', 's') + 16 * 60
ts_index = new_rng.to_timestamp()
Example #50
0
    ts2 = ts1.resample('B').ffill()
    # print(ts2)
    # # # # 如果要将ts1中"最当前"的值(即前向填充)加到ts2上.一个办法是将两者重采样为规整频率后再相加,但是如果想要维持ts2中的日期索引,则reindex回事一种更好的解决方案
    # print(ts1.reindex(ts2.index).ffill())
    # print(ts2+ts1.reindex(ts2.index).ffill())

    # # # 使用Period
    # # # # period(表示时间区间)提供了另一种处理不同频率时间序列的办法,尤其是那些有着特殊规范的一年或季度为频率的金融或经济序列
    gdp = Series([1.78, 1.94, 2.08, 2.01, 2.15, 2.31, 2.46],
                 index=pd.period_range('1984Q2', periods=7, freq='Q-SEP'))
    infl = Series([0.025, 0.045, 0.037, 0.04],
                  index=pd.period_range('1982', periods=4, freq='A-DEC'))
    # print(gdp,'\n')
    # print(infl)
    # # # # 跟timestamp的时间序列不同,由period索引的两个不同频率的时间序列之间的运算必须进行显式转换
    infl_q = infl.asfreq('Q-SEP', how='E')
    # print(infl_q)
    # # # # 这个时间序列就可以被重新索引了(使用前向填充以匹配GDP)
    # print(infl_q.reindex(gdp.index).ffill())

    # # # 时间和"最当前"数据选取
    # # # # 生成一个交易日内的日期范围和时间序列
    rng = pd.date_range('2012-06-01 09:30', '2012-06-01 15:59', freq='T')
    # # # # 生成5天的时间点(9:30~15:59之间的值)
    rng = rng.append([rng + pd.offsets.BDay(i) for i in range(1, 4)])
    ts = Series(np.arange(len(rng), dtype=float), index=rng)
    # print(ts)
    # # # # 利用python的datetime.time对象进行索引即可抽取这些时间点上的值
    # print(ts[time(10, 0)])
    # # # # 实际上,该操作用到了实例方法at_time(各时间序列以及类似的DF对象都有)
    # print(ts.at_time(time(10,0)))