Esempio n. 1
0
def __date_control_quarter(x):
    dates = x.dropna().index.to_series()

    start = dates.diff() < offset.Day(95)
    end = dates.shift(-1) - dates < offset.Day(95)

    start = dates[~start].values
    end = dates[~end].values

    start = pd.PeriodIndex(start, freq='Q')
    end = pd.PeriodIndex(end, freq='Q')

    return list(zip(start, end))
Esempio n. 2
0
    def test_get_freq_code(self):
        # freqstr
        self.assertEqual(frequencies.get_freq_code('A'),
                         (frequencies.get_freq('A'), 1))
        self.assertEqual(frequencies.get_freq_code('3D'),
                         (frequencies.get_freq('D'), 3))
        self.assertEqual(frequencies.get_freq_code('-2M'),
                         (frequencies.get_freq('M'), -2))

        # tuple
        self.assertEqual(frequencies.get_freq_code(('D', 1)),
                         (frequencies.get_freq('D'), 1))
        self.assertEqual(frequencies.get_freq_code(('A', 3)),
                         (frequencies.get_freq('A'), 3))
        self.assertEqual(frequencies.get_freq_code(('M', -2)),
                         (frequencies.get_freq('M'), -2))
        # numeric tuple
        self.assertEqual(frequencies.get_freq_code((1000, 1)), (1000, 1))

        # offsets
        self.assertEqual(frequencies.get_freq_code(offsets.Day()),
                         (frequencies.get_freq('D'), 1))
        self.assertEqual(frequencies.get_freq_code(offsets.Day(3)),
                         (frequencies.get_freq('D'), 3))
        self.assertEqual(frequencies.get_freq_code(offsets.Day(-2)),
                         (frequencies.get_freq('D'), -2))

        self.assertEqual(frequencies.get_freq_code(offsets.MonthEnd()),
                         (frequencies.get_freq('M'), 1))
        self.assertEqual(frequencies.get_freq_code(offsets.MonthEnd(3)),
                         (frequencies.get_freq('M'), 3))
        self.assertEqual(frequencies.get_freq_code(offsets.MonthEnd(-2)),
                         (frequencies.get_freq('M'), -2))

        self.assertEqual(frequencies.get_freq_code(offsets.Week()),
                         (frequencies.get_freq('W'), 1))
        self.assertEqual(frequencies.get_freq_code(offsets.Week(3)),
                         (frequencies.get_freq('W'), 3))
        self.assertEqual(frequencies.get_freq_code(offsets.Week(-2)),
                         (frequencies.get_freq('W'), -2))

        # monday is weekday=0
        self.assertEqual(frequencies.get_freq_code(offsets.Week(weekday=1)),
                         (frequencies.get_freq('W-TUE'), 1))
        self.assertEqual(frequencies.get_freq_code(offsets.Week(3, weekday=0)),
                         (frequencies.get_freq('W-MON'), 3))
        self.assertEqual(
            frequencies.get_freq_code(offsets.Week(-2, weekday=4)),
            (frequencies.get_freq('W-FRI'), -2))
Esempio n. 3
0
    def test_get_freq_code(self):
        # frequency str
        assert (frequencies.get_freq_code('A') == (frequencies.get_freq('A'),
                                                   1))
        assert (frequencies.get_freq_code('3D') == (frequencies.get_freq('D'),
                                                    3))
        assert (frequencies.get_freq_code('-2M') == (frequencies.get_freq('M'),
                                                     -2))

        # tuple
        assert (frequencies.get_freq_code(
            ('D', 1)) == (frequencies.get_freq('D'), 1))
        assert (frequencies.get_freq_code(
            ('A', 3)) == (frequencies.get_freq('A'), 3))
        assert (frequencies.get_freq_code(
            ('M', -2)) == (frequencies.get_freq('M'), -2))

        # numeric tuple
        assert frequencies.get_freq_code((1000, 1)) == (1000, 1)

        # offsets
        assert (frequencies.get_freq_code(
            offsets.Day()) == (frequencies.get_freq('D'), 1))
        assert (frequencies.get_freq_code(
            offsets.Day(3)) == (frequencies.get_freq('D'), 3))
        assert (frequencies.get_freq_code(
            offsets.Day(-2)) == (frequencies.get_freq('D'), -2))

        assert (frequencies.get_freq_code(
            offsets.MonthEnd()) == (frequencies.get_freq('M'), 1))
        assert (frequencies.get_freq_code(
            offsets.MonthEnd(3)) == (frequencies.get_freq('M'), 3))
        assert (frequencies.get_freq_code(
            offsets.MonthEnd(-2)) == (frequencies.get_freq('M'), -2))

        assert (frequencies.get_freq_code(
            offsets.Week()) == (frequencies.get_freq('W'), 1))
        assert (frequencies.get_freq_code(
            offsets.Week(3)) == (frequencies.get_freq('W'), 3))
        assert (frequencies.get_freq_code(
            offsets.Week(-2)) == (frequencies.get_freq('W'), -2))

        # Monday is weekday=0
        assert (frequencies.get_freq_code(
            offsets.Week(weekday=1)) == (frequencies.get_freq('W-TUE'), 1))
        assert (frequencies.get_freq_code(offsets.Week(
            3, weekday=0)) == (frequencies.get_freq('W-MON'), 3))
        assert (frequencies.get_freq_code(offsets.Week(
            -2, weekday=4)) == (frequencies.get_freq('W-FRI'), -2))
Esempio n. 4
0
 def test_construct_timestamp_preserve_original_frequency(self):
     # GH 22311
     with tm.assert_produces_warning(FutureWarning,
                                     match="The 'freq' argument"):
         result = Timestamp(Timestamp("2010-08-08", freq="D")).freq
     expected = offsets.Day()
     assert result == expected
Esempio n. 5
0
    def test_with_local_timezone_pytz(self):
        # see gh-5430
        local_timezone = pytz.timezone('America/Los_Angeles')

        start = datetime(year=2013,
                         month=11,
                         day=1,
                         hour=0,
                         minute=0,
                         tzinfo=pytz.utc)
        # 1 day later
        end = datetime(year=2013,
                       month=11,
                       day=2,
                       hour=0,
                       minute=0,
                       tzinfo=pytz.utc)

        index = pd.date_range(start, end, freq='H')

        series = Series(1, index=index)
        series = series.tz_convert(local_timezone)
        result = series.resample('D', kind='period').mean()

        # Create the expected series
        # Index is moved back a day with the timezone conversion from UTC to
        # Pacific
        expected_index = (pd.period_range(start=start, end=end, freq='D') -
                          offsets.Day())
        expected = Series(1, index=expected_index)
        assert_series_equal(result, expected)
Esempio n. 6
0
def regress_em(df):
    ret_list = []
    predict_start = pd.to_datetime("2017-03-18", infer_datetime_format='%Y-%m-%d')
    predict_end = pd.to_datetime("2017-04-22", infer_datetime_format='%Y-%m-%d')
    train_end = predict_start - offsets.Day(1)
    quarter_start = train_end - offsets.Week(13)
    year_start = train_end - offsets.Week(52)

    predict_df = take_df_by_period(df, predict_start, predict_end)
    quarter_df = take_df_by_period(df, quarter_start, train_end).dropna(axis=0, subset=["visitors_nan"])
    year_df = take_df_by_period(df, year_start, train_end).dropna(axis=0, subset=["visitors_nan"])
    if predict_df.empty or quarter_df.empty:
        return ret_list

    li1 = linear_model.LinearRegression()
    r1 = linear_model.Ridge(alpha=0.1)
    r2 = linear_model.Ridge(alpha=0.5)
    r3 = linear_model.Ridge(alpha=1.0)
    l1 = linear_model.Lasso(alpha=0.1)
    l2 = linear_model.Lasso(alpha=0.5)
    l3 = linear_model.Lasso(alpha=1.0)
    # h1 = linear_model.HuberRegressor()
    models = [li1, r1, r2, r3, l1, l2, l3]
    quarter_y_pred_list = do_regression(quarter_df, predict_df, models)
    year_y_pred_list = do_regression(year_df, predict_df, models)
    name_list = ["li1", "r1", "r2", "r3", "l1", "l2", "l3"]
    temp_df = pd.DataFrame(index=predict_df.index)
    for i in range(7):
        col_name = "q_regress_" + name_list[i]
        temp_df[col_name] = quarter_y_pred_list[i]
        col_name = "y_regress_" + name_list[i]
        temp_df[col_name] = year_y_pred_list[i]
    ret_list.append(temp_df)
    return ret_list
Esempio n. 7
0
def range_datetime(datetime_start, datetime_end, timeskip=None):
    """Build datetime generator over successive time steps."""
    if timeskip is None:
        timeskip = offsets.Day(1)
    while datetime_start <= datetime_end:
        yield datetime_start
        datetime_start += timeskip
    def test_valid(self):

        df = self.regular

        # not a valid freq
        msg = "passed window foobar is not compatible with a datetimelike index"
        with pytest.raises(ValueError, match=msg):
            df.rolling(window="foobar")
        # not a datetimelike index
        msg = "window must be an integer"
        with pytest.raises(ValueError, match=msg):
            df.reset_index().rolling(window="foobar")

        # non-fixed freqs
        msg = "\\<2 \\* MonthBegins\\> is a non-fixed frequency"
        for freq in ["2MS", offsets.MonthBegin(2)]:
            with pytest.raises(ValueError, match=msg):
                df.rolling(window=freq)

        for freq in ["1D", offsets.Day(2), "2ms"]:
            df.rolling(window=freq)

        # non-integer min_periods
        msg = (r"local variable 'minp' referenced before assignment|"
               "min_periods must be an integer")
        for minp in [1.0, "foo", np.array([1, 2, 3])]:
            with pytest.raises(ValueError, match=msg):
                df.rolling(window="1D", min_periods=minp)

        # center is not implemented
        msg = "center is not implemented for datetimelike and offset based windows"
        with pytest.raises(NotImplementedError, match=msg):
            df.rolling(window="1D", center=True)
Esempio n. 9
0
    def _construct_bt_dt_index(self):
        """ constructs the t0 dates index that runs from t0 to T
        if the price series is longer than the weights series use the the l
        The function takes the weights index and prepends it either with the last available previous date from the
        price index or else prepends t1 with a timedelta """
        dt_t0_tmp = self.price_date_index.copy()
        # where is first date
        first_weight_date = self.trading_dt_index[0]

        if dt_t0_tmp[0] < first_weight_date:
            # prices start before first weight date, bt index starts at date closest to weight date start
            # initialization date t0 is the date closest to the date of the first weight t1
            initialization_date_index = dt_t0_tmp.get_loc(first_weight_date) - 1
            dates_t0_index = dt_t0_tmp[initialization_date_index:]

        else:
            freq = self.frequency
            if freq == 'B':
                initialization_date = dt_t0_tmp[0] - time_offset.BDay(1)
            elif freq == 'D':
                initialization_date = dt_t0_tmp[0] - time_offset.Day(1)
            elif freq == 'min':
                initialization_date = dt_t0_tmp[0] - time_offset.Minute(1)
            elif freq == 'H':
                initialization_date = dt_t0_tmp[0] - time_offset.Hour(1)
            else:
                import pdb
                pdb.set_trace()
                assert freq == 'S'
                initialization_date = dt_t0_tmp[0] - time_offset.Second(1)
            # prepend index with "artificial" first datetime; interval chosen to match frequency of price index
            dates_t0_index = dt_t0_tmp.append(pd.DatetimeIndex([initialization_date])).sort_values()
        return dates_t0_index
Esempio n. 10
0
    def test_valid(self):

        df = self.regular

        # not a valid freq
        with pytest.raises(ValueError):
            df.rolling(window="foobar")

        # not a datetimelike index
        with pytest.raises(ValueError):
            df.reset_index().rolling(window="foobar")

        # non-fixed freqs
        for freq in ["2MS", offsets.MonthBegin(2)]:
            with pytest.raises(ValueError):
                df.rolling(window=freq)

        for freq in ["1D", offsets.Day(2), "2ms"]:
            df.rolling(window=freq)

        # non-integer min_periods
        for minp in [1.0, "foo", np.array([1, 2, 3])]:
            with pytest.raises(ValueError):
                df.rolling(window="1D", min_periods=minp)

        # center is not implemented
        with pytest.raises(NotImplementedError):
            df.rolling(window="1D", center=True)
Esempio n. 11
0
    def test_overflow_offset_raises(self):
        # xref https://github.com/statsmodels/statsmodels/issues/3374
        # ends up multiplying really large numbers which overflow

        stamp = Timestamp("2017-01-13 00:00:00", freq="D")
        offset_overflow = 20169940 * offsets.Day(1)
        msg = ("the add operation between "
               r"\<-?\d+ \* Days\> and \d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2} "
               "will overflow")

        with pytest.raises(OverflowError, match=msg):
            stamp + offset_overflow

        with pytest.raises(OverflowError, match=msg):
            offset_overflow + stamp

        with pytest.raises(OverflowError, match=msg):
            stamp - offset_overflow

        # xref https://github.com/pandas-dev/pandas/issues/14080
        # used to crash, so check for proper overflow exception

        stamp = Timestamp("2000/1/1")
        offset_overflow = to_offset("D") * 100**25

        with pytest.raises(OverflowError, match=msg):
            stamp + offset_overflow

        with pytest.raises(OverflowError, match=msg):
            offset_overflow + stamp

        with pytest.raises(OverflowError, match=msg):
            stamp - offset_overflow
Esempio n. 12
0
    def get_dates_range(self,
                        scale='auto',
                        start=None,
                        end=None,
                        date_max='2010-01-01'):
        '''
        Returns a list of dates sampled according to the specified parameters.

        :param scale: {'auto', 'maximum', 'daily', 'weekly', 'monthly',
            'quarterly', 'yearly'}
            Scale specifies the sampling intervals.
            'auto' will heuristically choose a scale for quick processing
        :param start: First date that will be included.
        :param end: Last date that will be included
        '''
        if scale not in [
                'auto', 'maximum', 'daily', 'weekly', 'monthly', 'quarterly',
                'yearly'
        ]:
            raise ValueError('Incorrect scale: %s' % scale)
        start = Timestamp(start or self._start.min() or date_max)
        # FIXME: start != start is true for NaN objects... is NaT the same?
        start = Timestamp(date_max) if repr(start) == 'NaT' else start
        end = Timestamp(end
                        or max(Timestamp(self._end.max()), self._start.max()))
        # FIXME: end != end ?
        end = datetime.utcnow() if repr(end) == 'NaT' else end
        start = start if self.check_in_bounds(start) else self._lbound
        end = end if self.check_in_bounds(end) else self._rbound

        if scale == 'auto':
            scale = self._auto_select_scale(start, end)
        if scale == 'maximum':
            start_dts = list(self._start.dropna().values)
            end_dts = list(self._end.dropna().values)
            dts = map(Timestamp, set(start_dts + end_dts))
            dts = filter(
                lambda ts: self.check_in_bounds(ts) and ts >= start and ts <=
                end, dts)
            return dts

        freq = dict(daily='D',
                    weekly='W',
                    monthly='M',
                    quarterly='3M',
                    yearly='12M')
        offset = dict(daily=off.Day(n=0),
                      weekly=off.Week(),
                      monthly=off.MonthEnd(),
                      quarterly=off.QuarterEnd(),
                      yearly=off.YearEnd())
        # for some reason, weekly date range gives one week less:
        end_ = end + off.Week() if scale == 'weekly' else end
        ret = list(pd.date_range(start + offset[scale], end_,
                                 freq=freq[scale]))
        ret = [dt for dt in ret if dt <= end]
        ret = [start] + ret if ret and start < ret[0] else ret
        ret = ret + [end] if ret and end > ret[-1] else ret
        ret = filter(lambda ts: self.check_in_bounds(ts), ret)
        return ret
Esempio n. 13
0
def test_get_rule_month():
    result = frequencies._get_rule_month('W')
    assert (result == 'DEC')
    result = frequencies._get_rule_month(offsets.Week())
    assert (result == 'DEC')

    result = frequencies._get_rule_month('D')
    assert (result == 'DEC')
    result = frequencies._get_rule_month(offsets.Day())
    assert (result == 'DEC')

    result = frequencies._get_rule_month('Q')
    assert (result == 'DEC')
    result = frequencies._get_rule_month(offsets.QuarterEnd(startingMonth=12))
    print(result == 'DEC')

    result = frequencies._get_rule_month('Q-JAN')
    assert (result == 'JAN')
    result = frequencies._get_rule_month(offsets.QuarterEnd(startingMonth=1))
    assert (result == 'JAN')

    result = frequencies._get_rule_month('A-DEC')
    assert (result == 'DEC')
    result = frequencies._get_rule_month(offsets.YearEnd())
    assert (result == 'DEC')

    result = frequencies._get_rule_month('A-MAY')
    assert (result == 'MAY')
    result = frequencies._get_rule_month(offsets.YearEnd(month=5))
    assert (result == 'MAY')
Esempio n. 14
0
    def test_with_local_timezone_dateutil(self):
        # see gh-5430
        local_timezone = "dateutil/America/Los_Angeles"

        start = datetime(year=2013,
                         month=11,
                         day=1,
                         hour=0,
                         minute=0,
                         tzinfo=dateutil.tz.tzutc())
        # 1 day later
        end = datetime(year=2013,
                       month=11,
                       day=2,
                       hour=0,
                       minute=0,
                       tzinfo=dateutil.tz.tzutc())

        index = pd.date_range(start, end, freq="H", name="idx")

        series = Series(1, index=index)
        series = series.tz_convert(local_timezone)
        result = series.resample("D", kind="period").mean()

        # Create the expected series
        # Index is moved back a day with the timezone conversion from UTC to
        # Pacific
        expected_index = (
            pd.period_range(start=start, end=end, freq="D", name="idx") -
            offsets.Day())
        expected = Series(1, index=expected_index)
        assert_series_equal(result, expected)
Esempio n. 15
0
    def next_day(self):
        self.t = offsets.Day(1).apply(self.t)
        price = list(self.hist['Close'].loc[self.hist.index.values ==
                                            np.datetime64(self.t)])

        if len(price) > 0:
            self.stock_price_t = price[0]
        else:
            self.stock_price_t = 'None'
Esempio n. 16
0
def test_delta_to_tick():
    delta = timedelta(3)

    tick = delta_to_tick(delta)
    assert tick == offsets.Day(3)

    td = Timedelta(nanoseconds=5)
    tick = delta_to_tick(td)
    assert tick == Nano(5)
Esempio n. 17
0
    def test_overflow_offset(self):
        # xref https://github.com/statsmodels/statsmodels/issues/3374
        # ends up multiplying really large numbers which overflow

        stamp = Timestamp('2017-01-13 00:00:00', freq='D')
        offset = 20169940 * offsets.Day(1)

        with pytest.raises(OverflowError):
            stamp + offset

        with pytest.raises(OverflowError):
            offset + stamp

        with pytest.raises(OverflowError):
            stamp - offset
Esempio n. 18
0
    def test_roll_date_object(self):
        offset = self._offset()

        dt = date(2012, 9, 15)

        result = offset.rollback(dt)
        assert result == datetime(2012, 9, 14)

        result = offset.rollforward(dt)
        assert result == datetime(2012, 9, 17)

        offset = offsets.Day()
        result = offset.rollback(dt)
        assert result == datetime(2012, 9, 15)

        result = offset.rollforward(dt)
        assert result == datetime(2012, 9, 15)
Esempio n. 19
0
    def test_roll_date_object(self):
        offset = CBMonthEnd()

        dt = date(2012, 9, 15)

        result = offset.rollback(dt)
        assert result == datetime(2012, 8, 31)

        result = offset.rollforward(dt)
        assert result == datetime(2012, 9, 28)

        offset = offsets.Day()
        result = offset.rollback(dt)
        assert result == datetime(2012, 9, 15)

        result = offset.rollforward(dt)
        assert result == datetime(2012, 9, 15)
Esempio n. 20
0
    def test_roll_date_object(self):
        offset = CBMonthBegin()

        dt = date(2012, 9, 15)

        result = offset.rollback(dt)
        assert result == datetime(2012, 9, 3)

        result = offset.rollforward(dt)
        assert result == datetime(2012, 10, 1)

        offset = offsets.Day()
        result = offset.rollback(dt)
        assert result == datetime(2012, 9, 15)

        result = offset.rollforward(dt)
        assert result == datetime(2012, 9, 15)
def data_move_test():
  s = pd.Series(np.random.randn(6), index=pd.date_range('1/1/2019', periods=6, freq='M'))
  print('原数据 \r\n', s)
  # 单纯的前后移动(数据移动,产生缺失数据)
  print('数据往后移动 \r\n', s.shift(2))
  print('数据往前移动 \r\n', s.shift(-2))
  print('后移动  freg参数,根据频率移动,实际对时间戳进行位移而不是对数据进行位移 \r\n', s.shift(2, freq='M'))
  print('前移动 freg参数\r\n', s.shift(-2, freq='D'))

  now = datetime.today()
  print('datetim 今天:\r\n', now)
  print('datetim 偏移 3天\r\n', now + 3 * offset.Day())
  print('datetim 偏移 到本月底\r\n', now + offset.MonthEnd())
  print('datetim期偏移 第2月后的月底\r\n', now + offset.MonthEnd(2))

  print('rollforward 向前滚到当月底 \r\n', offset.MonthEnd().rollforward(now))
  print('rollforward 向后滚到上月底\r\n', offset.MonthEnd().rollback(now))
  print('Series的时间戳 向前滚到月底\r\n', s.groupby(offset.MonthEnd().rollforward).count())
Esempio n. 22
0
def test_range_datetime():
    datetime_start = datetime.datetime(2019, 1, 15)
    datetime_end = datetime.datetime(2019, 1, 20)
    range_lst_no_offset = [
        datetime.datetime(2019, 1, 15, 0, 0),
        datetime.datetime(2019, 1, 16, 0, 0),
        datetime.datetime(2019, 1, 17, 0, 0),
        datetime.datetime(2019, 1, 18, 0, 0),
        datetime.datetime(2019, 1, 19, 0, 0),
        datetime.datetime(2019, 1, 20, 0, 0),
    ]
    range_lst_w_offset = [
        datetime.datetime(2019, 1, 15, 0, 0),
        datetime.datetime(2019, 1, 17, 0, 0),
        datetime.datetime(2019, 1, 19, 0, 0),
    ]

    drange = utils.range_datetime(datetime_start, datetime_end)
    assert range_lst_no_offset == list(drange)
    drange = utils.range_datetime(datetime_start, datetime_end, offsets.Day(2))
    assert range_lst_w_offset == list(drange)
Esempio n. 23
0
    def test_to_offset_pd_timedelta(self):
        # Tests for #9064
        td = Timedelta(days=1, seconds=1)
        result = frequencies.to_offset(td)
        expected = offsets.Second(86401)
        assert (expected == result)

        td = Timedelta(days=-1, seconds=1)
        result = frequencies.to_offset(td)
        expected = offsets.Second(-86399)
        assert (expected == result)

        td = Timedelta(hours=1, minutes=10)
        result = frequencies.to_offset(td)
        expected = offsets.Minute(70)
        assert (expected == result)

        td = Timedelta(hours=1, minutes=-10)
        result = frequencies.to_offset(td)
        expected = offsets.Minute(50)
        assert (expected == result)

        td = Timedelta(weeks=1)
        result = frequencies.to_offset(td)
        expected = offsets.Day(7)
        assert (expected == result)

        td1 = Timedelta(hours=1)
        result1 = frequencies.to_offset(td1)
        result2 = frequencies.to_offset('60min')
        assert (result1 == result2)

        td = Timedelta(microseconds=1)
        result = frequencies.to_offset(td)
        expected = offsets.Micro(1)
        assert (expected == result)

        td = Timedelta(microseconds=0)
        pytest.raises(ValueError, lambda: frequencies.to_offset(td))
Esempio n. 24
0
        # Frequency string.
        ("A", (get_freq("A"), 1)),
        ("3D", (get_freq("D"), 3)),
        ("-2M", (get_freq("M"), -2)),

        # Tuple.
        (("D", 1), (get_freq("D"), 1)),
        (("A", 3), (get_freq("A"), 3)),
        (("M", -2), (get_freq("M"), -2)),
        ((5, "T"), (FreqGroup.FR_MIN, 5)),

        # Numeric Tuple.
        ((1000, 1), (1000, 1)),

        # Offsets.
        (offsets.Day(), (get_freq("D"), 1)),
        (offsets.Day(3), (get_freq("D"), 3)),
        (offsets.Day(-2), (get_freq("D"), -2)),
        (offsets.MonthEnd(), (get_freq("M"), 1)),
        (offsets.MonthEnd(3), (get_freq("M"), 3)),
        (offsets.MonthEnd(-2), (get_freq("M"), -2)),
        (offsets.Week(), (get_freq("W"), 1)),
        (offsets.Week(3), (get_freq("W"), 3)),
        (offsets.Week(-2), (get_freq("W"), -2)),
        (offsets.Hour(), (FreqGroup.FR_HR, 1)),

        # Monday is weekday=0.
        (offsets.Week(weekday=1), (get_freq("W-TUE"), 1)),
        (offsets.Week(3, weekday=0), (get_freq("W-MON"), 3)),
        (offsets.Week(-2, weekday=4), (get_freq("W-FRI"), -2)),
    ])
Esempio n. 25
0
 def test_construct_timestamp_preserve_original_frequency(self):
     # GH 22311
     result = Timestamp(Timestamp("2010-08-08", freq="D")).freq
     expected = offsets.Day()
     assert result == expected
Esempio n. 26
0
import lineNotify
import annualData as ad
import pandas.tseries.offsets as offsets

# ロガー設定
logging.config.fileConfig('logging.conf')
logger = logging.getLogger()

# 明日の授業変更を持ってくる
data_5e_tomorrow = pd.read_csv('tomorrow.csv')

# 年間行事予定を取得する
annual_data = ad.get_data()

# 明日の日付を取得
tomorrow = (pd.datetime.today() + offsets.Day()).normalize()

# 明日の行事予定を取得する
annual_tomorrow = ad.search_for_date(annual_data, tomorrow)

# メッセージを作る
msgs = []
for index, d in data_5e_tomorrow.iterrows():
    msgs.append(lessonData.create_tweet(d))
# print(msgs)

for index, d in annual_tomorrow.iterrows():
    msgs.append(ad.create_tweet(d))

# line Notifyにポスト
ln = lineNotify.LineNotify()
Esempio n. 27
0
price['s_rate'] = price['s_rate'].fillna(1)
price['a_rate'] = 1.0

# yahoo の正確な計算式は不明。誤差が発生する銘柄もある。桁数だけの問題ではなさそう。
for i in reversed(range(len(price) - 1)):
    #price['a_rate'][i] = np.round(price['a_rate'][i + 1] / price['s_rate'][i + 1], 6)
    price['a_rate'][i] = price['a_rate'][i + 1] / price['s_rate'][i + 1]

price['CalcClose'] = np.round(price['Close'] * price['a_rate'], 2)

# In[ ]:

# 分割実施前後の期間を表示
for date in info.index:
    print(date.date())
    display(price[date + offsets.Day(-10):date + offsets.Day(10)])

# In[ ]:

price

# In[ ]:

price.to_csv('calc_1491.csv')

# In[ ]:

price['2014-07-01':'2014-07-30']

# In[ ]:
import matplotlib.dates as mdates
import pandas as pd
import pandas.tseries.offsets as offsets
import statsmodels.api as sm

data_file = sys.argv[1]
item_name = sys.argv[2]
type = sys.argv[3]
dest_file = sys.argv[4]
season = int(sys.argv[5]) if len(sys.argv) > 5 else None

df = pd.read_csv(data_file, parse_dates=['lastdate'])

ds = df.groupby(['lastdate'])[item_name].sum().astype('float')

r = sm.tsa.UnobservedComponents(ds, type, seasonal=season).fit()

print(r.summary())

fig, ax = plt.subplots()

plt.plot(ds)

start_date = max(ds.index)

plt.plot(r.predict(start_date, start_date + offsets.Day(350)))

ax.xaxis.set_major_locator(mdates.YearLocator())

plt.savefig(dest_file)
Esempio n. 29
0
def test_delta_to_tick():
    delta = timedelta(3)

    tick = offsets._delta_to_tick(delta)
    assert (tick == offsets.Day(3))
def close_fluxnet(insitu_df, offset=15):
    """
    This function applies the fluxnet methodology of closing EBC_CF Method 1 using a moving window of +/- 15 days
    Conservatively we set the default to include the +/- 15 day windows.
    Correction factors (closure ratios) outside of 1.5 x the 25th percentile and 75th percentiles are filtered
    This parameter can be changed to emulate EBC_CF Method 2 or EBCF Method 3 with minor changes to the time window
    See: https://fluxnet.fluxdata.org/data/fluxnet2015-dataset/data-processing/

    INPUT:
       	insitu_df | is a dataframe including columns
    	    # Rn (W/m2) labeled 'insitu_Rn'; G  (W/m2) labeled 'insitu_GHF';
    	    # LE (W/m2) labeled 'insitu_LE'; H (W/m2) labeled 'insitu_SHF'
    	offset | is the moving window range in units of days

    OUTPUT:
        insitu_df | added columns for EBC_CF energy balance at 50th percentile, 25th percentile, and 75th percentile
    Parameters:
    	insitu_df
    	offset
    Returns:
    	insitu_df

    """
    try:
        flag_day_lim = []
        insitu_cr_25 = []
        insitu_cr_50 = []
        insitu_cr_75 = []

        delta = offsets.Day(offset)

        for t0 in insitu_df.index:
            ss_1 = insitu_df[t0 - delta:t0 + delta]
            hour = ss_1.index.hour
            selector = (22 < hour) | (hour < 3) | ((10 <= hour) & (hour < 15))
            ss_2 = ss_1[selector]
            if (ss_2['insitu_GHF'].isna().sum() / len(ss_2.index)) > 0.2:
                cr_ss2 = ss_2.insitu_Rn / (ss_2.insitu_SHF + ss_2.insitu_LE)
            else:
                cr_ss2 = (ss_2.insitu_Rn -
                          ss_2.insitu_GHF) / (ss_2.insitu_SHF + ss_2.insitu_LE)
            ds_sort = sorted(cr_ss2)
            q1, q3 = np.percentile(ds_sort, [25, 75])
            iqr = q3 - q1
            # computing thresholds for closure quality filtering
            lower_bound = q1 - (1.5 * iqr)
            upper_bound = q3 + (1.5 * iqr)
            # filtering data
            cr_ss2[cr_ss2 > upper_bound] = np.nan
            cr_ss2[cr_ss2 < lower_bound] = np.nan
            cr_ss2 = cr_ss2[~np.isnan(cr_ss2)]
            ds_sort2 = sorted(cr_ss2)
            # computing clorure ratios
            cr_q1, cr_q3 = np.percentile(ds_sort2, [25, 75])
            cr_med = np.percentile(ds_sort2, [50])[0]
            insitu_cr_25.append(cr_q1)
            insitu_cr_50.append(cr_med)
            insitu_cr_75.append(cr_q3)
            inst_flag = len(cr_ss2) < 100  # less than 5 days of data points
            flag_day_lim.append(inst_flag)

        insitu_df['insitu_cr25'] = np.array(insitu_cr_25)
        insitu_df['insitu_cr50'] = np.array(insitu_cr_50)
        insitu_df['insitu_cr75'] = np.array(insitu_cr_75)

        EBC_lower_thresh = 0.5  # <-- These thresholds can be implemented on top of the FLUXNET2015 processing
        EBC_upper_thresh = 1.5  # <-- These thresholds can be implemented on top of the FLUXNET2015 processing
        insitu_df.loc[(insitu_df['insitu_cr50'] > EBC_upper_thresh
                       or insitu_df['insitu_cr50'] < EBC_lower_thresh),
                      'insitu_cr50'] = np.nan

        insitu_df.loc[np.isnan(insitu_df.insitu_cr50), 'insitu_cr25'] = np.nan
        insitu_df.loc[np.isnan(insitu_df.insitu_cr50), 'insitu_cr75'] = np.nan

        # Correcting LE and SHF observations with closure ratios
        # filtering data for unrealistic fluxes less than 0
        insitu_df[
            'insitu_LE_flux50'] = insitu_df.insitu_LE_raw * insitu_df.insitu_cr50
        insitu_df.loc[insitu_df['insitu_LE_flux50'] < 0,
                      'insitu_LE_flux50'] = np.nan
        insitu_df[
            'insitu_LE_flux25'] = insitu_df.insitu_LE_raw * insitu_df.insitu_cr25
        insitu_df.loc[insitu_df['insitu_LE_flux50'] < 0,
                      'insitu_LE_flux25'] = np.nan
        insitu_df[
            'insitu_LE_flux75'] = insitu_df.insitu_LE_raw * insitu_df.insitu_cr75
        insitu_df.loc[insitu_df['insitu_LE_flux50'] < 0,
                      'insitu_LE_flux75'] = np.nan

        insitu_df[
            'insitu_SHF_flux50'] = insitu_df.insitu_SHF * insitu_df.insitu_cr50
        insitu_df.loc[insitu_df['insitu_SHF_flux50'] < 0,
                      'insitu_SHF_flux_fc'] = np.nan
        insitu_df[
            'insitu_H_flux25'] = insitu_df.insitu_SHF * insitu_df.insitu_cr25
        insitu_df.loc[insitu_df['insitu_SHF_flux50'] < 0,
                      'insitu_H_flux25'] = np.nan
        insitu_df[
            'insitu_H_flux75'] = insitu_df.insitu_SHF * insitu_df.insitu_cr75
        insitu_df.loc[insitu_df['insitu_SHF_flux50'] < 0,
                      'insitu_H_flux75'] = np.nan

        # Creating a flag when there are less than 5 days of data
        insitu_df['ebc_1_flag'] = flag_day_lim

    except:
        # If the tower data cannot be closed due to missing data, we apply a range of artificial closure rates
        # These are only applied in the cases when net radiation or sensible heat flux observations are not available
        insitu_df['insitu_LE_1.1'] = insitu_df['insitu_LE_raw'].apply(
            lambda x: x * 1.1)
        insitu_df['insitu_LE_1.3'] = insitu_df['insitu_LE_raw'].apply(
            lambda x: x * 1.3)
        insitu_df['insitu_LE_1.5'] = insitu_df['insitu_LE_raw'].apply(
            lambda x: x * 1.5)

    return insitu_df