コード例 #1
0
class TestTimedeltaIndexOps:
    def test_value_counts_unique(self):
        # GH 7735
        idx = timedelta_range("1 days 09:00:00", freq="H", periods=10)
        # create repeated values, 'n'th element is repeated by n+1 times
        idx = TimedeltaIndex(np.repeat(idx.values, range(1, len(idx) + 1)))

        exp_idx = timedelta_range("1 days 18:00:00", freq="-1H", periods=10)
        exp_idx = exp_idx._with_freq(None)
        expected = Series(range(10, 0, -1), index=exp_idx, dtype="int64")

        obj = idx
        tm.assert_series_equal(obj.value_counts(), expected)

        obj = Series(idx)
        tm.assert_series_equal(obj.value_counts(), expected)

        expected = timedelta_range("1 days 09:00:00", freq="H", periods=10)
        tm.assert_index_equal(idx.unique(), expected)

        idx = TimedeltaIndex([
            "1 days 09:00:00",
            "1 days 09:00:00",
            "1 days 09:00:00",
            "1 days 08:00:00",
            "1 days 08:00:00",
            pd.NaT,
        ])

        exp_idx = TimedeltaIndex(["1 days 09:00:00", "1 days 08:00:00"])
        expected = Series([3, 2], index=exp_idx)

        for obj in [idx, Series(idx)]:
            tm.assert_series_equal(obj.value_counts(), expected)

        exp_idx = TimedeltaIndex(
            ["1 days 09:00:00", "1 days 08:00:00", pd.NaT])
        expected = Series([3, 2, 1], index=exp_idx)

        for obj in [idx, Series(idx)]:
            tm.assert_series_equal(obj.value_counts(dropna=False), expected)

        tm.assert_index_equal(idx.unique(), exp_idx)

    def test_nonunique_contains(self):
        # GH 9512
        for idx in map(
                TimedeltaIndex,
            (
                [0, 1, 0],
                [0, 0, -1],
                [0, -1, -1],
                ["00:01:00", "00:01:00", "00:02:00"],
                ["00:01:00", "00:01:00", "00:00:01"],
            ),
        ):
            assert idx[0] in idx

    def test_unknown_attribute(self):
        # see gh-9680
        tdi = pd.timedelta_range(start=0, periods=10, freq="1s")
        ts = pd.Series(np.random.normal(size=10), index=tdi)
        assert "foo" not in ts.__dict__.keys()
        msg = "'Series' object has no attribute 'foo'"
        with pytest.raises(AttributeError, match=msg):
            ts.foo

    def test_order(self):
        # GH 10295
        idx1 = TimedeltaIndex(["1 day", "2 day", "3 day"],
                              freq="D",
                              name="idx")
        idx2 = TimedeltaIndex(["1 hour", "2 hour", "3 hour"],
                              freq="H",
                              name="idx")

        for idx in [idx1, idx2]:
            ordered = idx.sort_values()
            tm.assert_index_equal(ordered, idx)
            assert ordered.freq == idx.freq

            ordered = idx.sort_values(ascending=False)
            expected = idx[::-1]
            tm.assert_index_equal(ordered, expected)
            assert ordered.freq == expected.freq
            assert ordered.freq.n == -1

            ordered, indexer = idx.sort_values(return_indexer=True)
            tm.assert_index_equal(ordered, idx)
            tm.assert_numpy_array_equal(indexer,
                                        np.array([0, 1, 2]),
                                        check_dtype=False)
            assert ordered.freq == idx.freq

            ordered, indexer = idx.sort_values(return_indexer=True,
                                               ascending=False)
            tm.assert_index_equal(ordered, idx[::-1])
            assert ordered.freq == expected.freq
            assert ordered.freq.n == -1

        idx1 = TimedeltaIndex(
            ["1 hour", "3 hour", "5 hour", "2 hour ", "1 hour"], name="idx1")
        exp1 = TimedeltaIndex(
            ["1 hour", "1 hour", "2 hour", "3 hour", "5 hour"], name="idx1")

        idx2 = TimedeltaIndex(["1 day", "3 day", "5 day", "2 day", "1 day"],
                              name="idx2")

        for idx, expected in [(idx1, exp1), (idx1, exp1), (idx1, exp1)]:
            ordered = idx.sort_values()
            tm.assert_index_equal(ordered, expected)
            assert ordered.freq is None

            ordered = idx.sort_values(ascending=False)
            tm.assert_index_equal(ordered, expected[::-1])
            assert ordered.freq is None

            ordered, indexer = idx.sort_values(return_indexer=True)
            tm.assert_index_equal(ordered, expected)

            exp = np.array([0, 4, 3, 1, 2])
            tm.assert_numpy_array_equal(indexer, exp, check_dtype=False)
            assert ordered.freq is None

            ordered, indexer = idx.sort_values(return_indexer=True,
                                               ascending=False)
            tm.assert_index_equal(ordered, expected[::-1])

            exp = np.array([2, 1, 3, 4, 0])
            tm.assert_numpy_array_equal(indexer, exp, check_dtype=False)
            assert ordered.freq is None

    def test_drop_duplicates_metadata(self, freq_sample):
        # GH 10115
        idx = pd.timedelta_range("1 day",
                                 periods=10,
                                 freq=freq_sample,
                                 name="idx")
        result = idx.drop_duplicates()
        tm.assert_index_equal(idx, result)
        assert idx.freq == result.freq

        idx_dup = idx.append(idx)
        assert idx_dup.freq is None  # freq is reset
        result = idx_dup.drop_duplicates()
        expected = idx._with_freq(None)
        tm.assert_index_equal(expected, result)
        assert result.freq is None

    @pytest.mark.parametrize(
        "keep, expected, index",
        [
            ("first", np.concatenate(
                ([False] * 10, [True] * 5)), np.arange(0, 10)),
            ("last", np.concatenate(
                ([True] * 5, [False] * 10)), np.arange(5, 15)),
            (
                False,
                np.concatenate(([True] * 5, [False] * 5, [True] * 5)),
                np.arange(5, 10),
            ),
        ],
    )
    def test_drop_duplicates(self, freq_sample, keep, expected, index):
        # to check Index/Series compat
        idx = pd.timedelta_range("1 day",
                                 periods=10,
                                 freq=freq_sample,
                                 name="idx")
        idx = idx.append(idx[:5])

        tm.assert_numpy_array_equal(idx.duplicated(keep=keep), expected)
        expected = idx[~expected]

        result = idx.drop_duplicates(keep=keep)
        tm.assert_index_equal(result, expected)

        result = Series(idx).drop_duplicates(keep=keep)
        tm.assert_series_equal(result, Series(expected, index=index))

    def test_infer_freq(self, freq_sample):
        # GH#11018
        idx = pd.timedelta_range("1", freq=freq_sample, periods=10)
        result = pd.TimedeltaIndex(idx.asi8, freq="infer")
        tm.assert_index_equal(idx, result)
        assert result.freq == freq_sample

    def test_repeat(self):
        index = pd.timedelta_range("1 days", periods=2, freq="D")
        exp = pd.TimedeltaIndex(["1 days", "1 days", "2 days", "2 days"])
        for res in [index.repeat(2), np.repeat(index, 2)]:
            tm.assert_index_equal(res, exp)
            assert res.freq is None

        index = TimedeltaIndex(["1 days", "NaT", "3 days"])
        exp = TimedeltaIndex([
            "1 days",
            "1 days",
            "1 days",
            "NaT",
            "NaT",
            "NaT",
            "3 days",
            "3 days",
            "3 days",
        ])
        for res in [index.repeat(3), np.repeat(index, 3)]:
            tm.assert_index_equal(res, exp)
            assert res.freq is None

    def test_nat(self):
        assert pd.TimedeltaIndex._na_value is pd.NaT
        assert pd.TimedeltaIndex([])._na_value is pd.NaT

        idx = pd.TimedeltaIndex(["1 days", "2 days"])
        assert idx._can_hold_na

        tm.assert_numpy_array_equal(idx._isnan, np.array([False, False]))
        assert idx.hasnans is False
        tm.assert_numpy_array_equal(idx._nan_idxs, np.array([], dtype=np.intp))

        idx = pd.TimedeltaIndex(["1 days", "NaT"])
        assert idx._can_hold_na

        tm.assert_numpy_array_equal(idx._isnan, np.array([False, True]))
        assert idx.hasnans is True
        tm.assert_numpy_array_equal(idx._nan_idxs, np.array([1],
                                                            dtype=np.intp))

    def test_equals(self):
        # GH 13107
        idx = pd.TimedeltaIndex(["1 days", "2 days", "NaT"])
        assert idx.equals(idx)
        assert idx.equals(idx.copy())
        assert idx.equals(idx.astype(object))
        assert idx.astype(object).equals(idx)
        assert idx.astype(object).equals(idx.astype(object))
        assert not idx.equals(list(idx))
        assert not idx.equals(pd.Series(idx))

        idx2 = pd.TimedeltaIndex(["2 days", "1 days", "NaT"])
        assert not idx.equals(idx2)
        assert not idx.equals(idx2.copy())
        assert not idx.equals(idx2.astype(object))
        assert not idx.astype(object).equals(idx2)
        assert not idx.astype(object).equals(idx2.astype(object))
        assert not idx.equals(list(idx2))
        assert not idx.equals(pd.Series(idx2))

        # Check that we dont raise OverflowError on comparisons outside the
        #  implementation range
        oob = pd.Index([timedelta(days=10**6)] * 3, dtype=object)
        assert not idx.equals(oob)
        assert not idx2.equals(oob)

        # FIXME: oob.apply(np.timedelta64) incorrectly overflows
        oob2 = pd.Index([np.timedelta64(x) for x in oob], dtype=object)
        assert not idx.equals(oob2)
        assert not idx2.equals(oob2)

    @pytest.mark.parametrize("values", [["0 days", "2 days", "4 days"], []])
    @pytest.mark.parametrize("freq", ["2D", Day(2), "48H", Hour(48)])
    def test_freq_setter(self, values, freq):
        # GH 20678
        idx = TimedeltaIndex(values)

        # can set to an offset, converting from string if necessary
        idx._data.freq = freq
        assert idx.freq == freq
        assert isinstance(idx.freq, DateOffset)

        # can reset to None
        idx._data.freq = None
        assert idx.freq is None

    def test_freq_setter_errors(self):
        # GH 20678
        idx = TimedeltaIndex(["0 days", "2 days", "4 days"])

        # setting with an incompatible freq
        msg = ("Inferred frequency 2D from passed values does not conform to "
               "passed frequency 5D")
        with pytest.raises(ValueError, match=msg):
            idx._data.freq = "5D"

        # setting with a non-fixed frequency
        msg = r"<2 \* BusinessDays> is a non-fixed frequency"
        with pytest.raises(ValueError, match=msg):
            idx._data.freq = "2B"

        # setting with non-freq string
        with pytest.raises(ValueError, match="Invalid frequency"):
            idx._data.freq = "foo"

    def test_freq_view_safe(self):
        # Setting the freq for one TimedeltaIndex shouldn't alter the freq
        #  for another that views the same data

        tdi = TimedeltaIndex(["0 days", "2 days", "4 days"], freq="2D")
        tda = tdi._data

        tdi2 = TimedeltaIndex(tda)._with_freq(None)
        assert tdi2.freq is None

        # Original was not altered
        assert tdi.freq == "2D"
        assert tda.freq == "2D"
コード例 #2
0
ファイル: demo6.py プロジェクト: py19912214/python_demo
import datetime
from pandas.tseries.offsets import Day

now_time =datetime.datetime.now()#获取当前时间
yes_time = (now_time -1*Day()).strftime('%Y-%m-%d')#格式化
print(yes_time.split("-")[2])
cur_time=now_time.strftime("%Y-%m-%d")
print(cur_time.split("-")[2]=='01')
コード例 #3
0
def is_bday(x):
    return x == x + Day(1) - BDay(1)
コード例 #4
0
    def _generate_range(cls,
                        start,
                        end,
                        periods,
                        freq,
                        tz=None,
                        normalize=False,
                        ambiguous='raise',
                        closed=None):
        if com.count_not_none(start, end, periods, freq) != 3:
            raise ValueError('Of the four parameters: start, end, periods, '
                             'and freq, exactly three must be specified')
        freq = to_offset(freq)

        if start is not None:
            start = Timestamp(start)

        if end is not None:
            end = Timestamp(end)

        if start is None and end is None:
            if closed is not None:
                raise ValueError("Closed has to be None if not both of start"
                                 "and end are defined")

        left_closed, right_closed = dtl.validate_endpoints(closed)

        start, end, _normalized = _maybe_normalize_endpoints(
            start, end, normalize)

        tz, inferred_tz = _infer_tz_from_endpoints(start, end, tz)

        if hasattr(freq, 'delta') and freq != Day():
            # sub-Day Tick
            if inferred_tz is None and tz is not None:
                # naive dates
                if start is not None and start.tz is None:
                    start = start.tz_localize(tz, ambiguous=False)

                if end is not None and end.tz is None:
                    end = end.tz_localize(tz, ambiguous=False)

            if start and end:
                if start.tz is None and end.tz is not None:
                    start = start.tz_localize(end.tz, ambiguous=False)

                if end.tz is None and start.tz is not None:
                    end = end.tz_localize(start.tz, ambiguous=False)

            if cls._use_cached_range(freq, _normalized, start, end):
                index = cls._cached_range(start,
                                          end,
                                          periods=periods,
                                          freq=freq)
            else:
                index = _generate_regular_range(cls, start, end, periods, freq)

        else:

            if tz is not None:
                # naive dates
                if start is not None and start.tz is not None:
                    start = start.replace(tzinfo=None)

                if end is not None and end.tz is not None:
                    end = end.replace(tzinfo=None)

            if start and end:
                if start.tz is None and end.tz is not None:
                    end = end.replace(tzinfo=None)

                if end.tz is None and start.tz is not None:
                    start = start.replace(tzinfo=None)

            if freq is not None:
                if cls._use_cached_range(freq, _normalized, start, end):
                    index = cls._cached_range(start,
                                              end,
                                              periods=periods,
                                              freq=freq)
                else:
                    index = _generate_regular_range(cls, start, end, periods,
                                                    freq)

                if tz is not None and getattr(index, 'tz', None) is None:
                    arr = conversion.tz_localize_to_utc(ensure_int64(
                        index.values),
                                                        tz,
                                                        ambiguous=ambiguous)

                    index = cls(arr)

                    # index is localized datetime64 array -> have to convert
                    # start/end as well to compare
                    if start is not None:
                        start = start.tz_localize(tz).asm8
                    if end is not None:
                        end = end.tz_localize(tz).asm8
            else:
                # Create a linearly spaced date_range in local time
                start = start.tz_localize(tz)
                end = end.tz_localize(tz)
                arr = np.linspace(start.value, end.value, periods)
                index = cls._simple_new(arr.astype('M8[ns]'), freq=None, tz=tz)

        if not left_closed and len(index) and index[0] == start:
            index = index[1:]
        if not right_closed and len(index) and index[-1] == end:
            index = index[:-1]

        return cls._simple_new(index.values, freq=freq, tz=tz)
コード例 #5
0
def integrated_regression_model(isIntelligent=True,
                                sku_id='',
                                df=None,
                                forecast_model='GBRT',
                                evaluation='MAPE',
                                length_merge=2,
                                period=5,
                                retrain=True):
    '''
	回归的预测方法
	--input parameters--
	df:预测的输入数据--DataFrame(训练+预测)
	forecast_model:预测模型
	evaluation:预测评估方法
	length_merge:数据合并的天数
	period:预测的周期
	'''
    df_tmp = df.copy()
    predict_days = length_merge * period

    # (1) 数据预处理
    df_tmp['week_num'] = df_tmp['date'].map(lambda x: x.weekday())
    index_train = df_tmp.index[:-predict_days]
    # 在训练集中求按星期的销量平均
    df_mean = df_tmp.ix[index_train, ['quantity', 'week_num']]
    y_avg = df_mean.groupby('week_num').quantity.aggregate([np.mean
                                                            ])  # 返回DataFrame
    y_avg = y_avg.ix[:, 0]  # 取第1列,等价于y_avg['mean'],返回Series
    # 将week_num列的值映射为按星期的平均销量
    df_tmp['week_num'] = df_tmp['week_num'].map(lambda x: y_avg[x])
    # 增加t-1,t-2,t-7的特征
    feature_lag = {
        'quantity': [1, 2, 7],
        'sale_price': [1],
        'purchase_price': [1],
        'weekend': [1]
    }
    df_tmp = add_lag_feature(df=df_tmp, feature_lag=feature_lag)
    # 保留y的原始值Series和平均值Series
    df_tmp = df_tmp.set_index(['date'])  # 将date列设置为索引,仍然为DataFrame
    y_series_mean = df_tmp['week_num']  # y的平均值Series
    y_series_original = df_tmp['quantity']  # y的原始值Series
    # 删除列,直接修改
    df_tmp.drop(['quantity', 'week_num'], axis=1, inplace=True)
    # 特征独热编码,将所有字符串类型的特征均做热编码
    df_tmp = code_dummies(df_tmp)
    # 取出输入数据X
    X = df_tmp.values.astype(np.float32)
    # 数据的标准化
    X = normalization(data_X=X)
    # 将标准化的数据加上表头,转化为DataFrame
    X_df = pd.DataFrame(X, index=df_tmp.index, columns=df_tmp.columns)

    # (2) 模型训练及滚动预测
    y_copy = y_series_original.copy()
    y_max = max(y_copy[:-predict_days])
    y_min = min(y_copy[:-predict_days])
    y_series_sub = y_series_original - y_series_mean
    # 输入数据的特征列表
    features = X_df.columns
    # 取出训练数据df
    X_df_train = X_df[:-predict_days].copy()
    X = X_df_train.values
    y_train = y_series_sub[:-predict_days]
    # 取出预测数据df
    X_df_test = X_df[-predict_days:].copy(
    )  # else-->A value is trying to be set on a copy of a slice from a DataFrame
    # 定义预测结果列表
    prediction_y = []
    # 迭代预测每一个预测样本
    for k, index in enumerate(X_df_test.index):
        t_1 = index - 1 * Day()
        t_2 = index - 2 * Day()
        t_7 = index - 7 * Day()
        X_df_test.ix[index,
                     'quantity_t_1'] = (y_copy[t_1] - y_min) / (y_max - y_min)
        X_df_test.ix[index,
                     'quantity_t_2'] = (y_copy[t_2] - y_min) / (y_max - y_min)
        X_df_test.ix[index,
                     'quantity_t_7'] = (y_copy[t_7] - y_min) / (y_max - y_min)
        # 特征选取
        if k == 0:  # 特征选取,只做一次
            selected_features = feature_selection(data_X=X, data_y=y_train)
            final_features = list(np.array(features)[selected_features])
            X_train = X_df_train.ix[:, selected_features].values  # 特征选择之后的X
        X_test = X_df_test.ix[index, final_features].values  # 取一行的话,结果是向量
        X_test = np.array(X_test).reshape(1, len(X_test))
        # 模型训练
        if k == 0:  # 模型训练,只做一次
            if retrain:
                # 模型初始化
                model = regression_methods[forecast_model]()
                # 模型训练
                model.fit(X_train, y_train)
                if isIntelligent is True:
                    save_forecast_model(id=sku_id,
                                        model_name=forecast_model,
                                        model=model)
            else:
                # 注意要调用已有的模型,则输入特征不能改变,否则预测会报错
                model = load_forecast_model(id=sku_id,
                                            model_name=forecast_model)
                if model is None:
                    print('[INFO]:sku_id({})的{}模型加载失败,重新训练'.format(
                        sku_id, forecast_model))
                    # 模型初始化
                    model = regression_methods[forecast_model]()
                    # 模型训练
                    model.fit(X_train, y_train)
                    save_forecast_model(id=sku_id,
                                        model_name=forecast_model,
                                        model=model)
        predicted = model.predict(X_test) + array(
            y_series_mean[index])  # 预测一个值
        y_copy[index] = predicted[0]
    # 预测数据
    y_predict = y_copy[-predict_days:]
    # 拟合数据
    y_fit = model.predict(X_train) + array(y_series_mean[:-predict_days])
    # 真实数据
    y_real = y_series_original[:-predict_days]
    # 计算训练数据的拟合误差
    error_fit = evaluation_methods[evaluation](y_test=array(y_real),
                                               y_predict=array(y_fit))
    #四舍五入保留2位小数
    y_real = list(np.round(y_real, 2))
    y_fit = list(np.round(y_fit, 2))
    y_predict = list(np.round(y_predict, 2))
    error_fit = np.round(error_fit, 2)

    return y_real, y_fit, y_predict, error_fit
コード例 #6
0
pd.date_range('1/1/2000', periods=5, freq='1h30min')

# 移动(超前和滞后)数据
ts = pd.Series(np.random.randn(4), index=pd.date_range('1/1/2000', periods=4, freq='M'))
ts
ts.shift(2)
ts.shift(-2)
ts.shift(2, freq='M')
ts.shift(3, freq='D')
ts.shift(1, freq='3D')
ts.shift(1, freq='90T')

# 通过偏移量对日期进行位移
from pandas.tseries.offsets import Day, MonthEnd
now = datetime(2011, 11, 17)
now+3*Day()
now + MonthEnd()
now + MonthEnd(2)

offset = MonthEnd()
offset.rollforward(now)
offset.rollback(now)

ts = pd.Series(np.random.randn(20), index=pd.date_range('1/15/2000', periods=20, freq='4d'))
ts.groupby(offset.rollforward).mean()
ts.resample('M').mean()

# 时区信息
# 获取时区
import pytz
pytz.common_timezones[-5:] # 时区名
コード例 #7
0
ファイル: test_views.py プロジェクト: jasonkholden/dtale
def build_ts_data(size=5, days=5):
    start = pd.Timestamp('20000101')
    for d in pd.date_range(start, start + Day(days - 1)):
        for i in range(size):
            yield dict(date=d, security_id=i, foo=i, bar=i)
コード例 #8
0
parent_path = os.path.abspath(
    os.path.join(os.path.dirname(__file__), os.pardir))
sys.path.append(parent_path)

import pandas as pd
import datetime as dt
from pandas.tseries.offsets import Day
import zipfile, io
import traceback
from settings import operator
from bs4 import BeautifulSoup

op = operator.Operator()
data_path = os.path.abspath(os.path.join(os.path.dirname(__file__), 'data'))
date_dt = dt.datetime.now() - Day(1)
date = date_dt.strftime('%Y%m%d')


def get_bond_csi(datadate=date):
    url = 'http://115.29.204.48/zqgz/%sbond_valuation.zip' % str(datadate)
    res = op.get_res(url, text=False)
    z = zipfile.ZipFile(io.BytesIO(res))
    for f_info in z.infolist():
        if 'txt' in f_info.filename:
            data_file = z.open(f_info)
    raw_data = [t.decode('utf-8') for t in data_file.readlines()[12:]]
    data_str = ''.join(raw_data)
    data_list = [t.replace(' ', '') for t in data_str.split('\r\n')]
    values = []
    for data_row in data_list:
コード例 #9
0
def create_data():
    """ create the pickle/msgpack data """

    data = {
        u'A': [0., 1., 2., 3., np.nan],
        u'B': [0, 1, 0, 1, 0],
        u'C': [u'foo1', u'foo2', u'foo3', u'foo4', u'foo5'],
        u'D': date_range('1/1/2009', periods=5),
        u'E': [0., 1, Timestamp('20100101'), u'foo', 2.]
    }

    scalars = dict(timestamp=Timestamp('20130101'), period=Period('2012', 'M'))

    index = dict(int=Index(np.arange(10)),
                 date=date_range('20130101', periods=10),
                 period=period_range('2013-01-01', freq='M', periods=10),
                 float=Index(np.arange(10, dtype=np.float64)),
                 uint=Index(np.arange(10, dtype=np.uint64)),
                 timedelta=timedelta_range('00:00:00', freq='30T', periods=10))

    if _loose_version >= LooseVersion('0.18'):
        from pandas import RangeIndex
        index['range'] = RangeIndex(10)

    if _loose_version >= LooseVersion('0.21'):
        from pandas import interval_range
        index['interval'] = interval_range(0, periods=10)

    mi = dict(reg2=MultiIndex.from_tuples(tuple(
        zip(*[[u'bar', u'bar', u'baz', u'baz', u'foo', u'foo', u'qux', u'qux'],
              [u'one', u'two', u'one', u'two', u'one', u'two', u'one', u'two']
              ])),
                                          names=[u'first', u'second']))

    series = dict(
        float=Series(data[u'A']),
        int=Series(data[u'B']),
        mixed=Series(data[u'E']),
        ts=Series(np.arange(10).astype(np.int64),
                  index=date_range('20130101', periods=10)),
        mi=Series(np.arange(5).astype(np.float64),
                  index=MultiIndex.from_tuples(tuple(
                      zip(*[[1, 1, 2, 2, 2], [3, 4, 3, 4, 5]])),
                                               names=[u'one', u'two'])),
        dup=Series(np.arange(5).astype(np.float64),
                   index=[u'A', u'B', u'C', u'D', u'A']),
        cat=Series(Categorical([u'foo', u'bar', u'baz'])),
        dt=Series(date_range('20130101', periods=5)),
        dt_tz=Series(date_range('20130101', periods=5, tz='US/Eastern')),
        period=Series([Period('2000Q1')] * 5))

    mixed_dup_df = DataFrame(data)
    mixed_dup_df.columns = list(u"ABCDA")
    frame = dict(
        float=DataFrame({
            u'A': series[u'float'],
            u'B': series[u'float'] + 1
        }),
        int=DataFrame({
            u'A': series[u'int'],
            u'B': series[u'int'] + 1
        }),
        mixed=DataFrame({k: data[k]
                         for k in [u'A', u'B', u'C', u'D']}),
        mi=DataFrame(
            {
                u'A': np.arange(5).astype(np.float64),
                u'B': np.arange(5).astype(np.int64)
            },
            index=MultiIndex.from_tuples(tuple(
                zip(*[[u'bar', u'bar', u'baz', u'baz', u'baz'],
                      [u'one', u'two', u'one', u'two', u'three']])),
                                         names=[u'first', u'second'])),
        dup=DataFrame(np.arange(15).reshape(5, 3).astype(np.float64),
                      columns=[u'A', u'B', u'A']),
        cat_onecol=DataFrame({u'A': Categorical([u'foo', u'bar'])}),
        cat_and_float=DataFrame({
            u'A': Categorical([u'foo', u'bar', u'baz']),
            u'B': np.arange(3).astype(np.int64)
        }),
        mixed_dup=mixed_dup_df,
        dt_mixed_tzs=DataFrame(
            {
                u'A': Timestamp('20130102', tz='US/Eastern'),
                u'B': Timestamp('20130603', tz='CET')
            },
            index=range(5)),
        dt_mixed2_tzs=DataFrame(
            {
                u'A': Timestamp('20130102', tz='US/Eastern'),
                u'B': Timestamp('20130603', tz='CET'),
                u'C': Timestamp('20130603', tz='UTC')
            },
            index=range(5)))

    cat = dict(int8=Categorical(list('abcdefg')),
               int16=Categorical(np.arange(1000)),
               int32=Categorical(np.arange(10000)))

    timestamp = dict(normal=Timestamp('2011-01-01'),
                     nat=NaT,
                     tz=Timestamp('2011-01-01', tz='US/Eastern'))

    if _loose_version < LooseVersion('0.19.2'):
        timestamp['freq'] = Timestamp('2011-01-01', offset='D')
        timestamp['both'] = Timestamp('2011-01-01',
                                      tz='Asia/Tokyo',
                                      offset='M')
    else:
        timestamp['freq'] = Timestamp('2011-01-01', freq='D')
        timestamp['both'] = Timestamp('2011-01-01', tz='Asia/Tokyo', freq='M')

    off = {
        'DateOffset': DateOffset(years=1),
        'DateOffset_h_ns': DateOffset(hour=6, nanoseconds=5824),
        'BusinessDay': BusinessDay(offset=timedelta(seconds=9)),
        'BusinessHour': BusinessHour(normalize=True, n=6, end='15:14'),
        'CustomBusinessDay': CustomBusinessDay(weekmask='Mon Fri'),
        'SemiMonthBegin': SemiMonthBegin(day_of_month=9),
        'SemiMonthEnd': SemiMonthEnd(day_of_month=24),
        'MonthBegin': MonthBegin(1),
        'MonthEnd': MonthEnd(1),
        'QuarterBegin': QuarterBegin(1),
        'QuarterEnd': QuarterEnd(1),
        'Day': Day(1),
        'YearBegin': YearBegin(1),
        'YearEnd': YearEnd(1),
        'Week': Week(1),
        'Week_Tues': Week(2, normalize=False, weekday=1),
        'WeekOfMonth': WeekOfMonth(week=3, weekday=4),
        'LastWeekOfMonth': LastWeekOfMonth(n=1, weekday=3),
        'FY5253': FY5253(n=2, weekday=6, startingMonth=7, variation="last"),
        'Easter': Easter(),
        'Hour': Hour(1),
        'Minute': Minute(1)
    }

    return dict(series=series,
                frame=frame,
                index=index,
                scalars=scalars,
                mi=mi,
                sp_series=dict(float=_create_sp_series(),
                               ts=_create_sp_tsseries()),
                sp_frame=dict(float=_create_sp_frame()),
                cat=cat,
                timestamp=timestamp,
                offsets=off)
コード例 #10
0
_ONE_DAY = 24 * _ONE_HOUR

# ---------------------------------------------------------------------
# Offset names ("time rules") and related functions

#: cache of previously seen offsets
_offset_map = {}  # type: Dict[str, DateOffset]


def get_period_alias(offset_str):
    """ alias to closest period strings BQ->Q etc"""
    return _offset_to_period_map.get(offset_str, None)


_name_to_offset_map = {
    "days": Day(1),
    "hours": Hour(1),
    "minutes": Minute(1),
    "seconds": Second(1),
    "milliseconds": Milli(1),
    "microseconds": Micro(1),
    "nanoseconds": Nano(1),
}


def to_offset(freq):
    """
    Return DateOffset object from string or tuple representation
    or datetime.timedelta object.

    Parameters
コード例 #11
0
ファイル: utils.py プロジェクト: zhuyx0110/alphalens
def compute_forward_returns(factor_idx,
                            prices,
                            periods=(1, 5, 10),
                            filter_zscore=None):
    """
    Finds the N period forward returns (as percent change) for each asset
    provided.

    Parameters
    ----------
    factor_idx : pd.DatetimeIndex
        The factor datetimes for which we are computing the forward returns
    prices : pd.DataFrame
        Pricing data to use in forward price calculation.
        Assets as columns, dates as index. Pricing data must
        span the factor analysis time period plus an additional buffer window
        that is greater than the maximum number of expected periods
        in the forward returns calculations.
    periods : sequence[int]
        periods to compute forward returns on.
    filter_zscore : int or float, optional
        Sets forward returns greater than X standard deviations
        from the the mean to nan. Set it to 'None' to avoid filtering.
        Caution: this outlier filtering incorporates lookahead bias.

    Returns
    -------
    forward_returns : pd.DataFrame - MultiIndex
        Forward returns in indexed by date and asset.
        Separate column for each forward return window.
    """

    factor_idx = factor_idx.intersection(prices.index)

    forward_returns = pd.DataFrame(index=pd.MultiIndex.from_product(
        [factor_idx, prices.columns], names=['date', 'asset']))

    custom_calendar = False

    for period in periods:

        #
        # build forward returns
        #
        delta = prices.pct_change(period).shift(-period).reindex(factor_idx)

        if filter_zscore is not None:
            mask = abs(delta - delta.mean()) > (filter_zscore * delta.std())
            delta[mask] = np.nan

        #
        # if the period length is not consistent across the factor index then
        # it must be a trading/business day calendar
        #
        time_diffs = prices.index.to_series().diff(period)
        time_diffs = time_diffs.reindex(factor_idx)
        if time_diffs.min() != time_diffs.max():
            custom_calendar = True

        #
        # find the period length that will be the column name
        #
        p_idx = prices.index.get_loc(delta.index[0])
        period_len = prices.index[p_idx+period] - prices.index[p_idx]

        #
        # use business days as an approximation to trading calendar
        #
        if custom_calendar and period_len.components.days > 0:
            entries_to_test = min(50, len(delta.index)-period)
            days_diffs = []
            for i in range(entries_to_test):
                p_idx = prices.index.get_loc(delta.index[i])
                days = len(pd.bdate_range(prices.index[p_idx],
                                          prices.index[p_idx+period])) - 1
                days_diffs.append(days)

            delta_days = period_len.components.days - mode(days_diffs).mode[0]
            period_len -= pd.Timedelta(days=delta_days)

        column_name = timedelta_to_string(period_len)
        forward_returns[column_name] = delta.stack()

    forward_returns.index = forward_returns.index.rename(['date', 'asset'])

    # use business days as an approximation to trading calendar, if this will
    # be proven to be a poor approximation then we could build a pandas
    # AbstractHolidayCalendar inferring non-trading days from price DataFrame
    # and use it to build a CustomBusinessDay DateOffset that we can finally
    # set it as index 'freq'
    freq = BDay() if custom_calendar else Day()
    forward_returns.index.levels[0].freq = freq

    return forward_returns
コード例 #12
0
# Transformaciones
VALUE = 'value'
CHANGE = 'change'
PCT_CHANGE = 'percent_change'
CHANGE_YEAR_AGO = 'change_a_year_ago'
PCT_CHANGE_YEAR_AGO = 'percent_change_a_year_ago'
CHANGE_BEG_YEAR = 'change_since_beginning_of_year'
PCT_CHANGE_BEG_YEAR = 'percent_change_since_beginning_of_year'

# Pandas freqs
PANDAS_YEAR = YearBegin()
PANDAS_SEMESTER = MonthBegin(6)
PANDAS_QUARTER = QuarterBegin(startingMonth=1)
PANDAS_MONTH = MonthBegin()
PANDAS_WEEK = Day(7)
PANDAS_DAY = Day()

# Frecuencias *en orden* de mayor a menor
PANDAS_FREQS = [
    PANDAS_YEAR, PANDAS_SEMESTER, PANDAS_QUARTER, PANDAS_MONTH, PANDAS_WEEK,
    PANDAS_DAY
]

IDENTIFIER = "identifier"
DATASET_IDENTIFIER = "dataset_identifier"
DOWNLOAD_URL = "downloadURL"

DATASET = 'dataset'
DISTRIBUTION = 'distribution'
FIELD = 'field'
コード例 #13
0
    'Q': 'Q-DEC',

    'A': 'A-DEC',  # YearEnd(month=12),
    'AS': 'AS-JAN',  # YearBegin(month=1),
    'BA': 'BA-DEC',  # BYearEnd(month=12),
    'BAS': 'BAS-JAN',  # BYearBegin(month=1),

    'Min': 'T',
    'min': 'T',
    'ms': 'L',
    'us': 'U',
    'ns': 'N'
}


_name_to_offset_map = {'days': Day(1),
                       'hours': Hour(1),
                       'minutes': Minute(1),
                       'seconds': Second(1),
                       'milliseconds': Milli(1),
                       'microseconds': Micro(1),
                       'nanoseconds': Nano(1)}


_INVALID_FREQ_ERROR = "Invalid frequency: {0}"


@deprecate_kwarg(old_arg_name='freqstr', new_arg_name='freq')
def to_offset(freq):
    """
    Return DateOffset object from string or tuple representation
コード例 #14
0
ファイル: test_ops.py プロジェクト: ziggi0703/pandas
class TestTimedeltaIndexOps(Ops):
    def setup_method(self, method):
        super(TestTimedeltaIndexOps, self).setup_method(method)
        mask = lambda x: isinstance(x, TimedeltaIndex)
        self.is_valid_objs = [o for o in self.objs if mask(o)]
        self.not_valid_objs = []

    def test_ops_properties(self):
        f = lambda x: isinstance(x, TimedeltaIndex)
        self.check_ops_properties(TimedeltaIndex._field_ops, f)
        self.check_ops_properties(TimedeltaIndex._object_ops, f)

    def test_minmax(self):

        # monotonic
        idx1 = TimedeltaIndex(['1 days', '2 days', '3 days'])
        assert idx1.is_monotonic

        # non-monotonic
        idx2 = TimedeltaIndex(['1 days', np.nan, '3 days', 'NaT'])
        assert not idx2.is_monotonic

        for idx in [idx1, idx2]:
            assert idx.min() == Timedelta('1 days')
            assert idx.max() == Timedelta('3 days')
            assert idx.argmin() == 0
            assert idx.argmax() == 2

        for op in ['min', 'max']:
            # Return NaT
            obj = TimedeltaIndex([])
            assert pd.isna(getattr(obj, op)())

            obj = TimedeltaIndex([pd.NaT])
            assert pd.isna(getattr(obj, op)())

            obj = TimedeltaIndex([pd.NaT, pd.NaT, pd.NaT])
            assert pd.isna(getattr(obj, op)())

    def test_numpy_minmax(self):
        dr = pd.date_range(start='2016-01-15', end='2016-01-20')
        td = TimedeltaIndex(np.asarray(dr))

        assert np.min(td) == Timedelta('16815 days')
        assert np.max(td) == Timedelta('16820 days')

        errmsg = "the 'out' parameter is not supported"
        tm.assert_raises_regex(ValueError, errmsg, np.min, td, out=0)
        tm.assert_raises_regex(ValueError, errmsg, np.max, td, out=0)

        assert np.argmin(td) == 0
        assert np.argmax(td) == 5

        errmsg = "the 'out' parameter is not supported"
        tm.assert_raises_regex(ValueError, errmsg, np.argmin, td, out=0)
        tm.assert_raises_regex(ValueError, errmsg, np.argmax, td, out=0)

    def test_value_counts_unique(self):
        # GH 7735

        idx = timedelta_range('1 days 09:00:00', freq='H', periods=10)
        # create repeated values, 'n'th element is repeated by n+1 times
        idx = TimedeltaIndex(np.repeat(idx.values, range(1, len(idx) + 1)))

        exp_idx = timedelta_range('1 days 18:00:00', freq='-1H', periods=10)
        expected = Series(range(10, 0, -1), index=exp_idx, dtype='int64')

        for obj in [idx, Series(idx)]:
            tm.assert_series_equal(obj.value_counts(), expected)

        expected = timedelta_range('1 days 09:00:00', freq='H', periods=10)
        tm.assert_index_equal(idx.unique(), expected)

        idx = TimedeltaIndex([
            '1 days 09:00:00', '1 days 09:00:00', '1 days 09:00:00',
            '1 days 08:00:00', '1 days 08:00:00', pd.NaT
        ])

        exp_idx = TimedeltaIndex(['1 days 09:00:00', '1 days 08:00:00'])
        expected = Series([3, 2], index=exp_idx)

        for obj in [idx, Series(idx)]:
            tm.assert_series_equal(obj.value_counts(), expected)

        exp_idx = TimedeltaIndex(
            ['1 days 09:00:00', '1 days 08:00:00', pd.NaT])
        expected = Series([3, 2, 1], index=exp_idx)

        for obj in [idx, Series(idx)]:
            tm.assert_series_equal(obj.value_counts(dropna=False), expected)

        tm.assert_index_equal(idx.unique(), exp_idx)

    def test_nonunique_contains(self):
        # GH 9512
        for idx in map(TimedeltaIndex, ([0, 1, 0], [0, 0, -1], [0, -1, -1], [
                '00:01:00', '00:01:00', '00:02:00'
        ], ['00:01:00', '00:01:00', '00:00:01'])):
            assert idx[0] in idx

    def test_unknown_attribute(self):
        # see gh-9680
        tdi = pd.timedelta_range(start=0, periods=10, freq='1s')
        ts = pd.Series(np.random.normal(size=10), index=tdi)
        assert 'foo' not in ts.__dict__.keys()
        pytest.raises(AttributeError, lambda: ts.foo)

    def test_order(self):
        # GH 10295
        idx1 = TimedeltaIndex(['1 day', '2 day', '3 day'],
                              freq='D',
                              name='idx')
        idx2 = TimedeltaIndex(['1 hour', '2 hour', '3 hour'],
                              freq='H',
                              name='idx')

        for idx in [idx1, idx2]:
            ordered = idx.sort_values()
            tm.assert_index_equal(ordered, idx)
            assert ordered.freq == idx.freq

            ordered = idx.sort_values(ascending=False)
            expected = idx[::-1]
            tm.assert_index_equal(ordered, expected)
            assert ordered.freq == expected.freq
            assert ordered.freq.n == -1

            ordered, indexer = idx.sort_values(return_indexer=True)
            tm.assert_index_equal(ordered, idx)
            tm.assert_numpy_array_equal(indexer,
                                        np.array([0, 1, 2]),
                                        check_dtype=False)
            assert ordered.freq == idx.freq

            ordered, indexer = idx.sort_values(return_indexer=True,
                                               ascending=False)
            tm.assert_index_equal(ordered, idx[::-1])
            assert ordered.freq == expected.freq
            assert ordered.freq.n == -1

        idx1 = TimedeltaIndex(
            ['1 hour', '3 hour', '5 hour', '2 hour ', '1 hour'], name='idx1')
        exp1 = TimedeltaIndex(
            ['1 hour', '1 hour', '2 hour', '3 hour', '5 hour'], name='idx1')

        idx2 = TimedeltaIndex(['1 day', '3 day', '5 day', '2 day', '1 day'],
                              name='idx2')

        # TODO(wesm): unused?
        # exp2 = TimedeltaIndex(['1 day', '1 day', '2 day',
        #                        '3 day', '5 day'], name='idx2')

        # idx3 = TimedeltaIndex([pd.NaT, '3 minute', '5 minute',
        #                        '2 minute', pd.NaT], name='idx3')
        # exp3 = TimedeltaIndex([pd.NaT, pd.NaT, '2 minute', '3 minute',
        #                        '5 minute'], name='idx3')

        for idx, expected in [(idx1, exp1), (idx1, exp1), (idx1, exp1)]:
            ordered = idx.sort_values()
            tm.assert_index_equal(ordered, expected)
            assert ordered.freq is None

            ordered = idx.sort_values(ascending=False)
            tm.assert_index_equal(ordered, expected[::-1])
            assert ordered.freq is None

            ordered, indexer = idx.sort_values(return_indexer=True)
            tm.assert_index_equal(ordered, expected)

            exp = np.array([0, 4, 3, 1, 2])
            tm.assert_numpy_array_equal(indexer, exp, check_dtype=False)
            assert ordered.freq is None

            ordered, indexer = idx.sort_values(return_indexer=True,
                                               ascending=False)
            tm.assert_index_equal(ordered, expected[::-1])

            exp = np.array([2, 1, 3, 4, 0])
            tm.assert_numpy_array_equal(indexer, exp, check_dtype=False)
            assert ordered.freq is None

    def test_drop_duplicates_metadata(self):
        # GH 10115
        idx = pd.timedelta_range('1 day', '31 day', freq='D', name='idx')
        result = idx.drop_duplicates()
        tm.assert_index_equal(idx, result)
        assert idx.freq == result.freq

        idx_dup = idx.append(idx)
        assert idx_dup.freq is None  # freq is reset
        result = idx_dup.drop_duplicates()
        tm.assert_index_equal(idx, result)
        assert result.freq is None

    def test_drop_duplicates(self):
        # to check Index/Series compat
        base = pd.timedelta_range('1 day', '31 day', freq='D', name='idx')
        idx = base.append(base[:5])

        res = idx.drop_duplicates()
        tm.assert_index_equal(res, base)
        res = Series(idx).drop_duplicates()
        tm.assert_series_equal(res, Series(base))

        res = idx.drop_duplicates(keep='last')
        exp = base[5:].append(base[:5])
        tm.assert_index_equal(res, exp)
        res = Series(idx).drop_duplicates(keep='last')
        tm.assert_series_equal(res, Series(exp, index=np.arange(5, 36)))

        res = idx.drop_duplicates(keep=False)
        tm.assert_index_equal(res, base[5:])
        res = Series(idx).drop_duplicates(keep=False)
        tm.assert_series_equal(res, Series(base[5:], index=np.arange(5, 31)))

    @pytest.mark.parametrize(
        'freq', ['D', '3D', '-3D', 'H', '2H', '-2H', 'T', '2T', 'S', '-3S'])
    def test_infer_freq(self, freq):
        # GH#11018
        idx = pd.timedelta_range('1', freq=freq, periods=10)
        result = pd.TimedeltaIndex(idx.asi8, freq='infer')
        tm.assert_index_equal(idx, result)
        assert result.freq == freq

    def test_shift(self):
        pass  # handled in test_arithmetic.py

    def test_repeat(self):
        index = pd.timedelta_range('1 days', periods=2, freq='D')
        exp = pd.TimedeltaIndex(['1 days', '1 days', '2 days', '2 days'])
        for res in [index.repeat(2), np.repeat(index, 2)]:
            tm.assert_index_equal(res, exp)
            assert res.freq is None

        index = TimedeltaIndex(['1 days', 'NaT', '3 days'])
        exp = TimedeltaIndex([
            '1 days', '1 days', '1 days', 'NaT', 'NaT', 'NaT', '3 days',
            '3 days', '3 days'
        ])
        for res in [index.repeat(3), np.repeat(index, 3)]:
            tm.assert_index_equal(res, exp)
            assert res.freq is None

    def test_nat(self):
        assert pd.TimedeltaIndex._na_value is pd.NaT
        assert pd.TimedeltaIndex([])._na_value is pd.NaT

        idx = pd.TimedeltaIndex(['1 days', '2 days'])
        assert idx._can_hold_na

        tm.assert_numpy_array_equal(idx._isnan, np.array([False, False]))
        assert idx.hasnans is False
        tm.assert_numpy_array_equal(idx._nan_idxs, np.array([], dtype=np.intp))

        idx = pd.TimedeltaIndex(['1 days', 'NaT'])
        assert idx._can_hold_na

        tm.assert_numpy_array_equal(idx._isnan, np.array([False, True]))
        assert idx.hasnans is True
        tm.assert_numpy_array_equal(idx._nan_idxs, np.array([1],
                                                            dtype=np.intp))

    def test_equals(self):
        # GH 13107
        idx = pd.TimedeltaIndex(['1 days', '2 days', 'NaT'])
        assert idx.equals(idx)
        assert idx.equals(idx.copy())
        assert idx.equals(idx.astype(object))
        assert idx.astype(object).equals(idx)
        assert idx.astype(object).equals(idx.astype(object))
        assert not idx.equals(list(idx))
        assert not idx.equals(pd.Series(idx))

        idx2 = pd.TimedeltaIndex(['2 days', '1 days', 'NaT'])
        assert not idx.equals(idx2)
        assert not idx.equals(idx2.copy())
        assert not idx.equals(idx2.astype(object))
        assert not idx.astype(object).equals(idx2)
        assert not idx.astype(object).equals(idx2.astype(object))
        assert not idx.equals(list(idx2))
        assert not idx.equals(pd.Series(idx2))

    @pytest.mark.parametrize('values', [['0 days', '2 days', '4 days'], []])
    @pytest.mark.parametrize('freq', ['2D', Day(2), '48H', Hour(48)])
    def test_freq_setter(self, values, freq):
        # GH 20678
        idx = TimedeltaIndex(values)

        # can set to an offset, converting from string if necessary
        idx.freq = freq
        assert idx.freq == freq
        assert isinstance(idx.freq, ABCDateOffset)

        # can reset to None
        idx.freq = None
        assert idx.freq is None

    def test_freq_setter_errors(self):
        # GH 20678
        idx = TimedeltaIndex(['0 days', '2 days', '4 days'])

        # setting with an incompatible freq
        msg = ('Inferred frequency 2D from passed values does not conform to '
               'passed frequency 5D')
        with tm.assert_raises_regex(ValueError, msg):
            idx.freq = '5D'

        # setting with a non-fixed frequency
        msg = r'<2 \* BusinessDays> is a non-fixed frequency'
        with tm.assert_raises_regex(ValueError, msg):
            idx.freq = '2B'

        # setting with non-freq string
        with tm.assert_raises_regex(ValueError, 'Invalid frequency'):
            idx.freq = 'foo'
コード例 #15
0
    """
    In 2010 Independence Day fell on a Saturday. Normally this would mean that
    Friday is a half day, but instead it is a full day off, so we need to
    exclude it from the usual half day rules.
    """
    return holidays[holidays.year != 2010]


NewYearsDay = new_years_day()

MaundyThursday = maundy_thursday()
MondayPriorToCorpusChristi = Holiday(
    "Monday Prior to Corpus Christi",
    month=1,
    day=1,
    offset=[Easter(), Day(57)],
    end_date="2008",
)

LabourDay = european_labour_day()

NavyDay = Holiday("Navy Day", month=5, day=21)

SaintPeterAndSaintPaulDay = saint_peter_and_saint_paul_day(
    observance=nearest_monday, )

OurLadyOfMountCarmelDay = Holiday(
    "Our Lady of Mount Carmel's Day",
    month=7,
    day=16,
    start_date="2008",
コード例 #16
0
    whit_monday,
    christmas_eve,
    christmas,
    boxing_day,
    new_years_eve,
)
from .trading_calendar import HolidayCalendar, TradingCalendar

NewYearsDay = new_years_day()

MaundyThursday = maundy_thursday()
GeneralPrayerDay = Holiday(
    'General Prayer Day',
    month=1,
    day=1,
    offset=[Easter(), Day(26)],
)
AscensionDay = ascension_day()
BankHoliday = Holiday(
    'Bank Holiday',
    month=1,
    day=1,
    offset=[Easter(), Day(40)],
    start_date='2009',
)
WhitMonday = whit_monday()

ConstitutionDay = Holiday('Constitution Day', month=6, day=5)

ChristmasEve = christmas_eve()
Christmas = christmas()
コード例 #17
0
    # when, for no explicable reason, Wednesday was a half day instead).
    "Fridays after Independence Day that aren't in 2013",
    month=7,
    day=5,
    days_of_week=(FRIDAY, ),
    observance=july_5th_holiday_observance,
    start_date=Timestamp("1995-01-01"),
)
USBlackFridayBefore1993 = Holiday(
    'Black Friday',
    month=11,
    day=1,
    # Black Friday was not observed until 1992.
    start_date=Timestamp('1992-01-01'),
    end_date=Timestamp('1993-01-01'),
    offset=[DateOffset(weekday=TH(4)), Day(1)],
)
USBlackFridayInOrAfter1993 = Holiday(
    'Black Friday',
    month=11,
    day=1,
    start_date=Timestamp('1993-01-01'),
    offset=[DateOffset(weekday=TH(4)), Day(1)],
)
BattleOfGettysburg = Holiday(
    # All of the floor traders in Chicago were sent to PA
    'Markets were closed during the battle of Gettysburg',
    month=7,
    day=(1, 2, 3),
    start_date=Timestamp("1863-07-01"),
    end_date=Timestamp("1863-07-03"))
コード例 #18
0
ファイル: person_portrait.py プロジェクト: miaomiaoliyi/xcrs
 def count_date(date):
     from pandas.tseries.offsets import Day
     before_date = pd.to_datetime(str(date),
                                  format='%Y%m%d') - 294 * Day()
     return int(before_date.strftime('%Y%m%d'))
コード例 #19
0
class DianPingHotelScrapy(scrapy.Spider):
    #定义爬虫名称
    name = 'DianPingHotelScrapy'
    #定义爬取多少页(最大50页)
    max_page = 5
    #定义开始爬取的网址
    start_urls= ['http://www.dianping.com/fuzhou/hotel/p%d' % x for x in range(1,max_page)]

    # 设置独立的请求头
    headers = {'Accept': '*/*',
               'Accept-Encoding': 'gzip, deflate, br',
               'Accept-Language': 'zh-CN,zh;q=0.9',
               'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.181 Safari/537.36',
               'Connection': 'keep-alive',
               'Referer': 'https://www.baidu.com',
               'Host': 'www.dianping.com',
               }

    NOW_TIME = datetime.datetime.now().strftime('%Y-%m-%d')
    NEXT_DAY_TIME = (datetime.datetime.now() + Day()).strftime('%Y-%m-%d')

    def start_requests(self):
        for url in self.start_urls:
            yield scrapy.Request(url=url,headers=self.headers, callback=self.parse_list,dont_filter=False)

    def parse_list(self, response):
        for hotel in response.css('ul.hotelshop-list li.hotel-block'):
            # 创建对象
            item = DianPingHotelItem()
            #获取酒店名
            item['name'] = hotel.css('.hotel-name-link::text').extract_first()
            #获取酒店id
            item['id'] = hotel.css('li::attr(data-poi)').extract_first()
            #获取酒店房间最低价格(网址会在显示后通过JS获取最新数据,所以直接抓取会得到错误的价格)
            item['price'] = GetHotelDetailInformation(item['id'],self.NOW_TIME ,self.NEXT_DAY_TIME)[0]['price']
            #获取酒店网址,以便下一步爬取
            item['url'] ='http://www.dianping.com/shop/' + item['id']
            #爬取酒店的详细信息
            yield scrapy.Request(item['url'], meta={'item':item }, callback=self.parse_detail)
            # 有下级页面爬取 注释掉数据返回
            #yield item

    def parse_detail(self, response):
        # 接收上级已爬取的数据
        item = response.meta['item']
        # 获取酒店位置
        item['place'] = response.css('span.hotel-address::text').extract_first()
        # 获取评分
        item['score'] = response.css('span.score::text').extract_first()
        # 获取联系方式
        item['contact'] = response.css('.info-value::text').extract_first()
        # 获取开业时间
        item['destablishment_data'] = response.xpath('//ul[@class="list-info"]/li/div[@class="info-value"]/text()').extract()[1]
        print(response.xpath('//ul[@class="list-info"]/li/div[@class="info-value"]/text()').extract()[1])
        # 获取点评数量,[1:-1]用于去除两端的括号
        item['remark_number'] = response.css('#comment .count::text').extract_first()[1:-1]
        # 获取好评比例,先判断总数是否为0
        if int(response.css('a[data-filter] span.count::text').extract()[0][1:-1]) != 0 :
            item['good_ratio'] = int((int(response.css('a[data-filter] span.count::text').extract()[0][1:-1]) +
                                 int(response.css('a[data-filter] span.count::text').extract()[1][1:-1]) )/ int(item['remark_number'])
                                *100)
        else:
            item['good_ratio'] = 0
        # 通过API获取房型列表(默认获取未来一天的房价,暂时先不采集)
        # now_time = datetime.datetime.now().strftime('%Y-%m-%d')
        # next_day_time = (datetime.datetime.now() + Day()).strftime('%Y-%m-%d')
        # item['room_type_list'] = GetHotelDetailInformation(item['id'],now_time,next_day_time)

        yield item
コード例 #20
0
ファイル: test_ops.py プロジェクト: Aathi410/Pro123
class TestDatetimeIndexOps:
    def test_ops_properties_basic(self, datetime_series):

        # sanity check that the behavior didn't change
        # GH#7206
        for op in ["year", "day", "second", "weekday"]:
            msg = f"'Series' object has no attribute '{op}'"
            with pytest.raises(AttributeError, match=msg):
                getattr(datetime_series, op)

        # attribute access should still work!
        s = Series({"year": 2000, "month": 1, "day": 10})
        assert s.year == 2000
        assert s.month == 1
        assert s.day == 10
        msg = "'Series' object has no attribute 'weekday'"
        with pytest.raises(AttributeError, match=msg):
            s.weekday

    @pytest.mark.parametrize(
        "freq,expected",
        [
            ("A", "day"),
            ("Q", "day"),
            ("M", "day"),
            ("D", "day"),
            ("H", "hour"),
            ("T", "minute"),
            ("S", "second"),
            ("L", "millisecond"),
            ("U", "microsecond"),
        ],
    )
    def test_resolution(self, request, tz_naive_fixture, freq, expected):
        tz = tz_naive_fixture
        if freq == "A" and not IS64 and isinstance(tz, tzlocal):
            request.node.add_marker(
                pytest.mark.xfail(reason="OverflowError inside tzlocal past 2038")
            )

        idx = date_range(start="2013-04-01", periods=30, freq=freq, tz=tz)
        assert idx.resolution == expected

    def test_infer_freq(self, freq_sample):
        # GH 11018
        idx = date_range("2011-01-01 09:00:00", freq=freq_sample, periods=10)
        result = DatetimeIndex(idx.asi8, freq="infer")
        tm.assert_index_equal(idx, result)
        assert result.freq == freq_sample

    @pytest.mark.parametrize("values", [["20180101", "20180103", "20180105"], []])
    @pytest.mark.parametrize("freq", ["2D", Day(2), "2B", BDay(2), "48H", Hour(48)])
    @pytest.mark.parametrize("tz", [None, "US/Eastern"])
    def test_freq_setter(self, values, freq, tz):
        # GH 20678
        idx = DatetimeIndex(values, tz=tz)

        # can set to an offset, converting from string if necessary
        idx._data.freq = freq
        assert idx.freq == freq
        assert isinstance(idx.freq, DateOffset)

        # can reset to None
        idx._data.freq = None
        assert idx.freq is None

    def test_freq_setter_errors(self):
        # GH 20678
        idx = DatetimeIndex(["20180101", "20180103", "20180105"])

        # setting with an incompatible freq
        msg = (
            "Inferred frequency 2D from passed values does not conform to "
            "passed frequency 5D"
        )
        with pytest.raises(ValueError, match=msg):
            idx._data.freq = "5D"

        # setting with non-freq string
        with pytest.raises(ValueError, match="Invalid frequency"):
            idx._data.freq = "foo"

    def test_freq_view_safe(self):
        # Setting the freq for one DatetimeIndex shouldn't alter the freq
        #  for another that views the same data

        dti = date_range("2016-01-01", periods=5)
        dta = dti._data

        dti2 = DatetimeIndex(dta)._with_freq(None)
        assert dti2.freq is None

        # Original was not altered
        assert dti.freq == "D"
        assert dta.freq == "D"
コード例 #21
0
class TestDatetimeIndexOps:
    def test_ops_properties_basic(self, datetime_series):

        # sanity check that the behavior didn't change
        # GH#7206
        for op in ["year", "day", "second", "weekday"]:
            msg = f"'Series' object has no attribute '{op}'"
            with pytest.raises(AttributeError, match=msg):
                getattr(datetime_series, op)

        # attribute access should still work!
        s = Series(dict(year=2000, month=1, day=10))
        assert s.year == 2000
        assert s.month == 1
        assert s.day == 10
        msg = "'Series' object has no attribute 'weekday'"
        with pytest.raises(AttributeError, match=msg):
            s.weekday

    def test_repeat_range(self, tz_naive_fixture):
        tz = tz_naive_fixture
        rng = date_range("1/1/2000", "1/1/2001")

        result = rng.repeat(5)
        assert result.freq is None
        assert len(result) == 5 * len(rng)

        index = pd.date_range("2001-01-01", periods=2, freq="D", tz=tz)
        exp = pd.DatetimeIndex(
            ["2001-01-01", "2001-01-01", "2001-01-02", "2001-01-02"], tz=tz
        )
        for res in [index.repeat(2), np.repeat(index, 2)]:
            tm.assert_index_equal(res, exp)
            assert res.freq is None

        index = pd.date_range("2001-01-01", periods=2, freq="2D", tz=tz)
        exp = pd.DatetimeIndex(
            ["2001-01-01", "2001-01-01", "2001-01-03", "2001-01-03"], tz=tz
        )
        for res in [index.repeat(2), np.repeat(index, 2)]:
            tm.assert_index_equal(res, exp)
            assert res.freq is None

        index = pd.DatetimeIndex(["2001-01-01", "NaT", "2003-01-01"], tz=tz)
        exp = pd.DatetimeIndex(
            [
                "2001-01-01",
                "2001-01-01",
                "2001-01-01",
                "NaT",
                "NaT",
                "NaT",
                "2003-01-01",
                "2003-01-01",
                "2003-01-01",
            ],
            tz=tz,
        )
        for res in [index.repeat(3), np.repeat(index, 3)]:
            tm.assert_index_equal(res, exp)
            assert res.freq is None

    def test_repeat(self, tz_naive_fixture):
        tz = tz_naive_fixture
        reps = 2
        msg = "the 'axis' parameter is not supported"

        rng = pd.date_range(start="2016-01-01", periods=2, freq="30Min", tz=tz)

        expected_rng = DatetimeIndex(
            [
                Timestamp("2016-01-01 00:00:00", tz=tz, freq="30T"),
                Timestamp("2016-01-01 00:00:00", tz=tz, freq="30T"),
                Timestamp("2016-01-01 00:30:00", tz=tz, freq="30T"),
                Timestamp("2016-01-01 00:30:00", tz=tz, freq="30T"),
            ]
        )

        res = rng.repeat(reps)
        tm.assert_index_equal(res, expected_rng)
        assert res.freq is None

        tm.assert_index_equal(np.repeat(rng, reps), expected_rng)
        with pytest.raises(ValueError, match=msg):
            np.repeat(rng, reps, axis=1)

    def test_resolution(self, tz_naive_fixture):
        tz = tz_naive_fixture
        for freq, expected in zip(
            ["A", "Q", "M", "D", "H", "T", "S", "L", "U"],
            [
                "day",
                "day",
                "day",
                "day",
                "hour",
                "minute",
                "second",
                "millisecond",
                "microsecond",
            ],
        ):
            idx = pd.date_range(start="2013-04-01", periods=30, freq=freq, tz=tz)
            assert idx.resolution == expected

    def test_value_counts_unique(self, tz_naive_fixture):
        tz = tz_naive_fixture
        # GH 7735
        idx = pd.date_range("2011-01-01 09:00", freq="H", periods=10)
        # create repeated values, 'n'th element is repeated by n+1 times
        idx = DatetimeIndex(np.repeat(idx.values, range(1, len(idx) + 1)), tz=tz)

        exp_idx = pd.date_range("2011-01-01 18:00", freq="-1H", periods=10, tz=tz)
        expected = Series(range(10, 0, -1), index=exp_idx, dtype="int64")
        expected.index = expected.index._with_freq(None)

        for obj in [idx, Series(idx)]:

            tm.assert_series_equal(obj.value_counts(), expected)

        expected = pd.date_range("2011-01-01 09:00", freq="H", periods=10, tz=tz)
        expected = expected._with_freq(None)
        tm.assert_index_equal(idx.unique(), expected)

        idx = DatetimeIndex(
            [
                "2013-01-01 09:00",
                "2013-01-01 09:00",
                "2013-01-01 09:00",
                "2013-01-01 08:00",
                "2013-01-01 08:00",
                pd.NaT,
            ],
            tz=tz,
        )

        exp_idx = DatetimeIndex(["2013-01-01 09:00", "2013-01-01 08:00"], tz=tz)
        expected = Series([3, 2], index=exp_idx)

        for obj in [idx, Series(idx)]:
            tm.assert_series_equal(obj.value_counts(), expected)

        exp_idx = DatetimeIndex(["2013-01-01 09:00", "2013-01-01 08:00", pd.NaT], tz=tz)
        expected = Series([3, 2, 1], index=exp_idx)

        for obj in [idx, Series(idx)]:
            tm.assert_series_equal(obj.value_counts(dropna=False), expected)

        tm.assert_index_equal(idx.unique(), exp_idx)

    @pytest.mark.parametrize(
        "idx",
        [
            DatetimeIndex(
                ["2011-01-01", "2011-01-02", "2011-01-03"], freq="D", name="idx"
            ),
            DatetimeIndex(
                ["2011-01-01 09:00", "2011-01-01 10:00", "2011-01-01 11:00"],
                freq="H",
                name="tzidx",
                tz="Asia/Tokyo",
            ),
        ],
    )
    def test_order_with_freq(self, idx):
        ordered = idx.sort_values()
        tm.assert_index_equal(ordered, idx)
        assert ordered.freq == idx.freq

        ordered = idx.sort_values(ascending=False)
        expected = idx[::-1]
        tm.assert_index_equal(ordered, expected)
        assert ordered.freq == expected.freq
        assert ordered.freq.n == -1

        ordered, indexer = idx.sort_values(return_indexer=True)
        tm.assert_index_equal(ordered, idx)
        tm.assert_numpy_array_equal(indexer, np.array([0, 1, 2]), check_dtype=False)
        assert ordered.freq == idx.freq

        ordered, indexer = idx.sort_values(return_indexer=True, ascending=False)
        expected = idx[::-1]
        tm.assert_index_equal(ordered, expected)
        tm.assert_numpy_array_equal(indexer, np.array([2, 1, 0]), check_dtype=False)
        assert ordered.freq == expected.freq
        assert ordered.freq.n == -1

    @pytest.mark.parametrize(
        "index_dates,expected_dates",
        [
            (
                ["2011-01-01", "2011-01-03", "2011-01-05", "2011-01-02", "2011-01-01"],
                ["2011-01-01", "2011-01-01", "2011-01-02", "2011-01-03", "2011-01-05"],
            ),
            (
                ["2011-01-01", "2011-01-03", "2011-01-05", "2011-01-02", "2011-01-01"],
                ["2011-01-01", "2011-01-01", "2011-01-02", "2011-01-03", "2011-01-05"],
            ),
            (
                [pd.NaT, "2011-01-03", "2011-01-05", "2011-01-02", pd.NaT],
                [pd.NaT, pd.NaT, "2011-01-02", "2011-01-03", "2011-01-05"],
            ),
        ],
    )
    def test_order_without_freq(self, index_dates, expected_dates, tz_naive_fixture):
        tz = tz_naive_fixture

        # without freq
        index = DatetimeIndex(index_dates, tz=tz, name="idx")
        expected = DatetimeIndex(expected_dates, tz=tz, name="idx")

        ordered = index.sort_values()
        tm.assert_index_equal(ordered, expected)
        assert ordered.freq is None

        ordered = index.sort_values(ascending=False)
        tm.assert_index_equal(ordered, expected[::-1])
        assert ordered.freq is None

        ordered, indexer = index.sort_values(return_indexer=True)
        tm.assert_index_equal(ordered, expected)

        exp = np.array([0, 4, 3, 1, 2])
        tm.assert_numpy_array_equal(indexer, exp, check_dtype=False)
        assert ordered.freq is None

        ordered, indexer = index.sort_values(return_indexer=True, ascending=False)
        tm.assert_index_equal(ordered, expected[::-1])

        exp = np.array([2, 1, 3, 4, 0])
        tm.assert_numpy_array_equal(indexer, exp, check_dtype=False)
        assert ordered.freq is None

    def test_drop_duplicates_metadata(self, freq_sample):
        # GH 10115
        idx = pd.date_range("2011-01-01", freq=freq_sample, periods=10, name="idx")
        result = idx.drop_duplicates()
        tm.assert_index_equal(idx, result)
        assert idx.freq == result.freq

        idx_dup = idx.append(idx)
        assert idx_dup.freq is None  # freq is reset
        result = idx_dup.drop_duplicates()
        expected = idx._with_freq(None)
        tm.assert_index_equal(result, expected)
        assert result.freq is None

    @pytest.mark.parametrize(
        "keep, expected, index",
        [
            ("first", np.concatenate(([False] * 10, [True] * 5)), np.arange(0, 10)),
            ("last", np.concatenate(([True] * 5, [False] * 10)), np.arange(5, 15)),
            (
                False,
                np.concatenate(([True] * 5, [False] * 5, [True] * 5)),
                np.arange(5, 10),
            ),
        ],
    )
    def test_drop_duplicates(self, freq_sample, keep, expected, index):
        # to check Index/Series compat
        idx = pd.date_range("2011-01-01", freq=freq_sample, periods=10, name="idx")
        idx = idx.append(idx[:5])

        tm.assert_numpy_array_equal(idx.duplicated(keep=keep), expected)
        expected = idx[~expected]

        result = idx.drop_duplicates(keep=keep)
        tm.assert_index_equal(result, expected)

        result = Series(idx).drop_duplicates(keep=keep)
        tm.assert_series_equal(result, Series(expected, index=index))

    def test_infer_freq(self, freq_sample):
        # GH 11018
        idx = pd.date_range("2011-01-01 09:00:00", freq=freq_sample, periods=10)
        result = pd.DatetimeIndex(idx.asi8, freq="infer")
        tm.assert_index_equal(idx, result)
        assert result.freq == freq_sample

    def test_nat(self, tz_naive_fixture):
        tz = tz_naive_fixture
        assert pd.DatetimeIndex._na_value is pd.NaT
        assert pd.DatetimeIndex([])._na_value is pd.NaT

        idx = pd.DatetimeIndex(["2011-01-01", "2011-01-02"], tz=tz)
        assert idx._can_hold_na

        tm.assert_numpy_array_equal(idx._isnan, np.array([False, False]))
        assert idx.hasnans is False
        tm.assert_numpy_array_equal(idx._nan_idxs, np.array([], dtype=np.intp))

        idx = pd.DatetimeIndex(["2011-01-01", "NaT"], tz=tz)
        assert idx._can_hold_na

        tm.assert_numpy_array_equal(idx._isnan, np.array([False, True]))
        assert idx.hasnans is True
        tm.assert_numpy_array_equal(idx._nan_idxs, np.array([1], dtype=np.intp))

    def test_equals(self):
        # GH 13107
        idx = pd.DatetimeIndex(["2011-01-01", "2011-01-02", "NaT"])
        assert idx.equals(idx)
        assert idx.equals(idx.copy())
        assert idx.equals(idx.astype(object))
        assert idx.astype(object).equals(idx)
        assert idx.astype(object).equals(idx.astype(object))
        assert not idx.equals(list(idx))
        assert not idx.equals(Series(idx))

        idx2 = pd.DatetimeIndex(["2011-01-01", "2011-01-02", "NaT"], tz="US/Pacific")
        assert not idx.equals(idx2)
        assert not idx.equals(idx2.copy())
        assert not idx.equals(idx2.astype(object))
        assert not idx.astype(object).equals(idx2)
        assert not idx.equals(list(idx2))
        assert not idx.equals(Series(idx2))

        # same internal, different tz
        idx3 = pd.DatetimeIndex(idx.asi8, tz="US/Pacific")
        tm.assert_numpy_array_equal(idx.asi8, idx3.asi8)
        assert not idx.equals(idx3)
        assert not idx.equals(idx3.copy())
        assert not idx.equals(idx3.astype(object))
        assert not idx.astype(object).equals(idx3)
        assert not idx.equals(list(idx3))
        assert not idx.equals(Series(idx3))

        # check that we do not raise when comparing with OutOfBounds objects
        oob = Index([datetime(2500, 1, 1)] * 3, dtype=object)
        assert not idx.equals(oob)
        assert not idx2.equals(oob)
        assert not idx3.equals(oob)

        # check that we do not raise when comparing with OutOfBounds dt64
        oob2 = oob.map(np.datetime64)
        assert not idx.equals(oob2)
        assert not idx2.equals(oob2)
        assert not idx3.equals(oob2)

    @pytest.mark.parametrize("values", [["20180101", "20180103", "20180105"], []])
    @pytest.mark.parametrize("freq", ["2D", Day(2), "2B", BDay(2), "48H", Hour(48)])
    @pytest.mark.parametrize("tz", [None, "US/Eastern"])
    def test_freq_setter(self, values, freq, tz):
        # GH 20678
        idx = DatetimeIndex(values, tz=tz)

        # can set to an offset, converting from string if necessary
        idx._data.freq = freq
        assert idx.freq == freq
        assert isinstance(idx.freq, DateOffset)

        # can reset to None
        idx._data.freq = None
        assert idx.freq is None

    def test_freq_setter_errors(self):
        # GH 20678
        idx = DatetimeIndex(["20180101", "20180103", "20180105"])

        # setting with an incompatible freq
        msg = (
            "Inferred frequency 2D from passed values does not conform to "
            "passed frequency 5D"
        )
        with pytest.raises(ValueError, match=msg):
            idx._data.freq = "5D"

        # setting with non-freq string
        with pytest.raises(ValueError, match="Invalid frequency"):
            idx._data.freq = "foo"

    def test_freq_view_safe(self):
        # Setting the freq for one DatetimeIndex shouldn't alter the freq
        #  for another that views the same data

        dti = pd.date_range("2016-01-01", periods=5)
        dta = dti._data

        dti2 = DatetimeIndex(dta)._with_freq(None)
        assert dti2.freq is None

        # Original was not altered
        assert dti.freq == "D"
        assert dta.freq == "D"
コード例 #22
0
class TestDatetimeIndexOps(Ops):
    def setup_method(self, method):
        super().setup_method(method)
        mask = lambda x: (isinstance(x, DatetimeIndex) or isinstance(
            x, PeriodIndex))
        self.is_valid_objs = [o for o in self.objs if mask(o)]
        self.not_valid_objs = [o for o in self.objs if not mask(o)]

    def test_ops_properties(self):
        f = lambda x: isinstance(x, DatetimeIndex)
        self.check_ops_properties(DatetimeIndex._field_ops, f)
        self.check_ops_properties(DatetimeIndex._object_ops, f)
        self.check_ops_properties(DatetimeIndex._bool_ops, f)

    def test_ops_properties_basic(self):

        # sanity check that the behavior didn't change
        # GH#7206
        msg = "'Series' object has no attribute '{}'"
        for op in ['year', 'day', 'second', 'weekday']:
            with pytest.raises(AttributeError, match=msg.format(op)):
                getattr(self.dt_series, op)

        # attribute access should still work!
        s = Series(dict(year=2000, month=1, day=10))
        assert s.year == 2000
        assert s.month == 1
        assert s.day == 10
        msg = "'Series' object has no attribute 'weekday'"
        with pytest.raises(AttributeError, match=msg):
            s.weekday

    def test_repeat_range(self, tz_naive_fixture):
        tz = tz_naive_fixture
        rng = date_range('1/1/2000', '1/1/2001')

        result = rng.repeat(5)
        assert result.freq is None
        assert len(result) == 5 * len(rng)

        index = pd.date_range('2001-01-01', periods=2, freq='D', tz=tz)
        exp = pd.DatetimeIndex(
            ['2001-01-01', '2001-01-01', '2001-01-02', '2001-01-02'], tz=tz)
        for res in [index.repeat(2), np.repeat(index, 2)]:
            tm.assert_index_equal(res, exp)
            assert res.freq is None

        index = pd.date_range('2001-01-01', periods=2, freq='2D', tz=tz)
        exp = pd.DatetimeIndex(
            ['2001-01-01', '2001-01-01', '2001-01-03', '2001-01-03'], tz=tz)
        for res in [index.repeat(2), np.repeat(index, 2)]:
            tm.assert_index_equal(res, exp)
            assert res.freq is None

        index = pd.DatetimeIndex(['2001-01-01', 'NaT', '2003-01-01'], tz=tz)
        exp = pd.DatetimeIndex([
            '2001-01-01', '2001-01-01', '2001-01-01', 'NaT', 'NaT', 'NaT',
            '2003-01-01', '2003-01-01', '2003-01-01'
        ],
                               tz=tz)
        for res in [index.repeat(3), np.repeat(index, 3)]:
            tm.assert_index_equal(res, exp)
            assert res.freq is None

    def test_repeat(self, tz_naive_fixture):
        tz = tz_naive_fixture
        reps = 2
        msg = "the 'axis' parameter is not supported"

        rng = pd.date_range(start='2016-01-01', periods=2, freq='30Min', tz=tz)

        expected_rng = DatetimeIndex([
            Timestamp('2016-01-01 00:00:00', tz=tz, freq='30T'),
            Timestamp('2016-01-01 00:00:00', tz=tz, freq='30T'),
            Timestamp('2016-01-01 00:30:00', tz=tz, freq='30T'),
            Timestamp('2016-01-01 00:30:00', tz=tz, freq='30T'),
        ])

        res = rng.repeat(reps)
        tm.assert_index_equal(res, expected_rng)
        assert res.freq is None

        tm.assert_index_equal(np.repeat(rng, reps), expected_rng)
        with pytest.raises(ValueError, match=msg):
            np.repeat(rng, reps, axis=1)

    def test_resolution(self, tz_naive_fixture):
        tz = tz_naive_fixture
        for freq, expected in zip(
            ['A', 'Q', 'M', 'D', 'H', 'T', 'S', 'L', 'U'], [
                'day', 'day', 'day', 'day', 'hour', 'minute', 'second',
                'millisecond', 'microsecond'
            ]):
            idx = pd.date_range(start='2013-04-01',
                                periods=30,
                                freq=freq,
                                tz=tz)
            assert idx.resolution == expected

    def test_value_counts_unique(self, tz_naive_fixture):
        tz = tz_naive_fixture
        # GH 7735
        idx = pd.date_range('2011-01-01 09:00', freq='H', periods=10)
        # create repeated values, 'n'th element is repeated by n+1 times
        idx = DatetimeIndex(np.repeat(idx.values, range(1,
                                                        len(idx) + 1)),
                            tz=tz)

        exp_idx = pd.date_range('2011-01-01 18:00',
                                freq='-1H',
                                periods=10,
                                tz=tz)
        expected = Series(range(10, 0, -1), index=exp_idx, dtype='int64')

        for obj in [idx, Series(idx)]:
            tm.assert_series_equal(obj.value_counts(), expected)

        expected = pd.date_range('2011-01-01 09:00',
                                 freq='H',
                                 periods=10,
                                 tz=tz)
        tm.assert_index_equal(idx.unique(), expected)

        idx = DatetimeIndex([
            '2013-01-01 09:00', '2013-01-01 09:00', '2013-01-01 09:00',
            '2013-01-01 08:00', '2013-01-01 08:00', pd.NaT
        ],
                            tz=tz)

        exp_idx = DatetimeIndex(['2013-01-01 09:00', '2013-01-01 08:00'],
                                tz=tz)
        expected = Series([3, 2], index=exp_idx)

        for obj in [idx, Series(idx)]:
            tm.assert_series_equal(obj.value_counts(), expected)

        exp_idx = DatetimeIndex(
            ['2013-01-01 09:00', '2013-01-01 08:00', pd.NaT], tz=tz)
        expected = Series([3, 2, 1], index=exp_idx)

        for obj in [idx, Series(idx)]:
            tm.assert_series_equal(obj.value_counts(dropna=False), expected)

        tm.assert_index_equal(idx.unique(), exp_idx)

    def test_nonunique_contains(self):
        # GH 9512
        for idx in map(DatetimeIndex,
                       ([0, 1, 0], [0, 0, -1], [0, -1, -1],
                        ['2015', '2015', '2016'], ['2015', '2015', '2014'])):
            assert idx[0] in idx

    @pytest.mark.parametrize('idx', [
        DatetimeIndex(
            ['2011-01-01', '2011-01-02', '2011-01-03'], freq='D', name='idx'),
        DatetimeIndex(
            ['2011-01-01 09:00', '2011-01-01 10:00', '2011-01-01 11:00'],
            freq='H',
            name='tzidx',
            tz='Asia/Tokyo')
    ])
    def test_order_with_freq(self, idx):
        ordered = idx.sort_values()
        tm.assert_index_equal(ordered, idx)
        assert ordered.freq == idx.freq

        ordered = idx.sort_values(ascending=False)
        expected = idx[::-1]
        tm.assert_index_equal(ordered, expected)
        assert ordered.freq == expected.freq
        assert ordered.freq.n == -1

        ordered, indexer = idx.sort_values(return_indexer=True)
        tm.assert_index_equal(ordered, idx)
        tm.assert_numpy_array_equal(indexer,
                                    np.array([0, 1, 2]),
                                    check_dtype=False)
        assert ordered.freq == idx.freq

        ordered, indexer = idx.sort_values(return_indexer=True,
                                           ascending=False)
        expected = idx[::-1]
        tm.assert_index_equal(ordered, expected)
        tm.assert_numpy_array_equal(indexer,
                                    np.array([2, 1, 0]),
                                    check_dtype=False)
        assert ordered.freq == expected.freq
        assert ordered.freq.n == -1

    @pytest.mark.parametrize(
        'index_dates,expected_dates',
        [([
            '2011-01-01', '2011-01-03', '2011-01-05', '2011-01-02',
            '2011-01-01'
        ], [
            '2011-01-01', '2011-01-01', '2011-01-02', '2011-01-03',
            '2011-01-05'
        ]),
         ([
             '2011-01-01', '2011-01-03', '2011-01-05', '2011-01-02',
             '2011-01-01'
         ], [
             '2011-01-01', '2011-01-01', '2011-01-02', '2011-01-03',
             '2011-01-05'
         ]),
         ([pd.NaT, '2011-01-03', '2011-01-05', '2011-01-02', pd.NaT
           ], [pd.NaT, pd.NaT, '2011-01-02', '2011-01-03', '2011-01-05'])])
    def test_order_without_freq(self, index_dates, expected_dates,
                                tz_naive_fixture):
        tz = tz_naive_fixture

        # without freq
        index = DatetimeIndex(index_dates, tz=tz, name='idx')
        expected = DatetimeIndex(expected_dates, tz=tz, name='idx')

        ordered = index.sort_values()
        tm.assert_index_equal(ordered, expected)
        assert ordered.freq is None

        ordered = index.sort_values(ascending=False)
        tm.assert_index_equal(ordered, expected[::-1])
        assert ordered.freq is None

        ordered, indexer = index.sort_values(return_indexer=True)
        tm.assert_index_equal(ordered, expected)

        exp = np.array([0, 4, 3, 1, 2])
        tm.assert_numpy_array_equal(indexer, exp, check_dtype=False)
        assert ordered.freq is None

        ordered, indexer = index.sort_values(return_indexer=True,
                                             ascending=False)
        tm.assert_index_equal(ordered, expected[::-1])

        exp = np.array([2, 1, 3, 4, 0])
        tm.assert_numpy_array_equal(indexer, exp, check_dtype=False)
        assert ordered.freq is None

    def test_drop_duplicates_metadata(self):
        # GH 10115
        idx = pd.date_range('2011-01-01', '2011-01-31', freq='D', name='idx')
        result = idx.drop_duplicates()
        tm.assert_index_equal(idx, result)
        assert idx.freq == result.freq

        idx_dup = idx.append(idx)
        assert idx_dup.freq is None  # freq is reset
        result = idx_dup.drop_duplicates()
        tm.assert_index_equal(idx, result)
        assert result.freq is None

    def test_drop_duplicates(self):
        # to check Index/Series compat
        base = pd.date_range('2011-01-01', '2011-01-31', freq='D', name='idx')
        idx = base.append(base[:5])

        res = idx.drop_duplicates()
        tm.assert_index_equal(res, base)
        res = Series(idx).drop_duplicates()
        tm.assert_series_equal(res, Series(base))

        res = idx.drop_duplicates(keep='last')
        exp = base[5:].append(base[:5])
        tm.assert_index_equal(res, exp)
        res = Series(idx).drop_duplicates(keep='last')
        tm.assert_series_equal(res, Series(exp, index=np.arange(5, 36)))

        res = idx.drop_duplicates(keep=False)
        tm.assert_index_equal(res, base[5:])
        res = Series(idx).drop_duplicates(keep=False)
        tm.assert_series_equal(res, Series(base[5:], index=np.arange(5, 31)))

    @pytest.mark.parametrize('freq', [
        'A', '2A', '-2A', 'Q', '-1Q', 'M', '-1M', 'D', '3D', '-3D', 'W', '-1W',
        'H', '2H', '-2H', 'T', '2T', 'S', '-3S'
    ])
    def test_infer_freq(self, freq):
        # GH 11018
        idx = pd.date_range('2011-01-01 09:00:00', freq=freq, periods=10)
        result = pd.DatetimeIndex(idx.asi8, freq='infer')
        tm.assert_index_equal(idx, result)
        assert result.freq == freq

    def test_nat(self, tz_naive_fixture):
        tz = tz_naive_fixture
        assert pd.DatetimeIndex._na_value is pd.NaT
        assert pd.DatetimeIndex([])._na_value is pd.NaT

        idx = pd.DatetimeIndex(['2011-01-01', '2011-01-02'], tz=tz)
        assert idx._can_hold_na

        tm.assert_numpy_array_equal(idx._isnan, np.array([False, False]))
        assert idx.hasnans is False
        tm.assert_numpy_array_equal(idx._nan_idxs, np.array([], dtype=np.intp))

        idx = pd.DatetimeIndex(['2011-01-01', 'NaT'], tz=tz)
        assert idx._can_hold_na

        tm.assert_numpy_array_equal(idx._isnan, np.array([False, True]))
        assert idx.hasnans is True
        tm.assert_numpy_array_equal(idx._nan_idxs, np.array([1],
                                                            dtype=np.intp))

    def test_equals(self):
        # GH 13107
        idx = pd.DatetimeIndex(['2011-01-01', '2011-01-02', 'NaT'])
        assert idx.equals(idx)
        assert idx.equals(idx.copy())
        assert idx.equals(idx.astype(object))
        assert idx.astype(object).equals(idx)
        assert idx.astype(object).equals(idx.astype(object))
        assert not idx.equals(list(idx))
        assert not idx.equals(pd.Series(idx))

        idx2 = pd.DatetimeIndex(['2011-01-01', '2011-01-02', 'NaT'],
                                tz='US/Pacific')
        assert not idx.equals(idx2)
        assert not idx.equals(idx2.copy())
        assert not idx.equals(idx2.astype(object))
        assert not idx.astype(object).equals(idx2)
        assert not idx.equals(list(idx2))
        assert not idx.equals(pd.Series(idx2))

        # same internal, different tz
        idx3 = pd.DatetimeIndex._simple_new(idx.asi8, tz='US/Pacific')
        tm.assert_numpy_array_equal(idx.asi8, idx3.asi8)
        assert not idx.equals(idx3)
        assert not idx.equals(idx3.copy())
        assert not idx.equals(idx3.astype(object))
        assert not idx.astype(object).equals(idx3)
        assert not idx.equals(list(idx3))
        assert not idx.equals(pd.Series(idx3))

    @pytest.mark.parametrize('values',
                             [['20180101', '20180103', '20180105'], []])
    @pytest.mark.parametrize(
        'freq', ['2D', Day(2), '2B',
                 BDay(2), '48H', Hour(48)])
    @pytest.mark.parametrize('tz', [None, 'US/Eastern'])
    def test_freq_setter(self, values, freq, tz):
        # GH 20678
        idx = DatetimeIndex(values, tz=tz)

        # can set to an offset, converting from string if necessary
        idx.freq = freq
        assert idx.freq == freq
        assert isinstance(idx.freq, ABCDateOffset)

        # can reset to None
        idx.freq = None
        assert idx.freq is None

    def test_freq_setter_errors(self):
        # GH 20678
        idx = DatetimeIndex(['20180101', '20180103', '20180105'])

        # setting with an incompatible freq
        msg = ('Inferred frequency 2D from passed values does not conform to '
               'passed frequency 5D')
        with pytest.raises(ValueError, match=msg):
            idx.freq = '5D'

        # setting with non-freq string
        with pytest.raises(ValueError, match='Invalid frequency'):
            idx.freq = 'foo'

    def test_offset_deprecated(self):
        # GH 20716
        idx = pd.DatetimeIndex(['20180101', '20180102'])

        # getter deprecated
        with tm.assert_produces_warning(FutureWarning):
            idx.offset

        # setter deprecated
        with tm.assert_produces_warning(FutureWarning):
            idx.offset = BDay()
コード例 #23
0
 def regular_holidays(self):
     return HolidayCalendar([
         new_years_day(observance=sunday_to_monday, ),
         Holiday(
             "Human Rights Day",
             month=3,
             day=21,
             observance=sunday_to_monday,
         ),
         GoodFriday,
         Holiday(
             "Family Day",
             month=1,
             day=1,
             offset=[Easter(), Day(1)],
         ),
         Holiday(
             "Freedom Day",
             month=4,
             day=27,
             observance=sunday_to_monday,
         ),
         Holiday(
             "Workers' Day",
             month=5,
             day=1,
             observance=sunday_to_monday,
         ),
         Holiday(
             "Youth Day",
             month=6,
             day=16,
             observance=sunday_to_monday,
         ),
         Holiday(
             "National Women's Day",
             month=8,
             day=9,
             observance=sunday_to_monday,
         ),
         Holiday(
             "Heritage Day",
             month=9,
             day=24,
             observance=sunday_to_monday,
         ),
         Holiday(
             "Day of Reconciliation",
             month=12,
             day=16,
             observance=sunday_to_monday,
         ),
         Holiday(
             "Christmas",
             month=12,
             day=25,
             observance=sunday_to_monday,
         ),
         Holiday(
             "Day of Goodwill",
             month=12,
             day=26,
             observance=sunday_to_monday,
         ),
     ])
コード例 #24
0
ファイル: holiday.py プロジェクト: ssmall41/pytorch-ts
    def distance_to_day(index):
        holiday_date = holiday.dates(
            index - pd.Timedelta(days=MAX_WINDOW),
            index + pd.Timedelta(days=MAX_WINDOW),
        )
        assert (
            len(holiday_date) != 0
        ), f"No closest holiday for the date index {index} found."
        # It sometimes returns two dates if it is exactly half a year after the
        # holiday. In this case, the smaller distance (182 days) is returned.
        return (index - holiday_date[0]).days

    return distance_to_day


EasterSunday = Holiday("Easter Sunday", month=1, day=1, offset=[Easter(), Day(0)])
NewYearsDay = Holiday("New Years Day", month=1, day=1)
SuperBowl = Holiday("Superbowl", month=2, day=1, offset=DateOffset(weekday=SU(1)))
MothersDay = Holiday("Mothers Day", month=5, day=1, offset=DateOffset(weekday=SU(2)))
IndependenceDay = Holiday("Independence Day", month=7, day=4)
ChristmasEve = Holiday("Christmas", month=12, day=24)
ChristmasDay = Holiday("Christmas", month=12, day=25)
NewYearsEve = Holiday("New Years Eve", month=12, day=31)
BlackFriday = Holiday(
    "Black Friday", month=11, day=1, offset=[pd.DateOffset(weekday=TH(4)), Day(1)]
)
CyberMonday = Holiday(
    "Cyber Monday",
    month=11,
    day=1,
    offset=[pd.DateOffset(weekday=TH(4)), Day(4)],
コード例 #25
0
from .common_holidays import (
    christmas,
    christmas_eve,
    european_labour_day,
    new_years_day,
    new_years_eve,
)
from .exchange_calendar import HolidayCalendar, ExchangeCalendar

NewYearsDay = new_years_day()

# Need custom start year so can't use pandas GoodFriday
GoodFriday = Holiday("Good Friday",
                     month=1,
                     day=1,
                     offset=[Easter(), Day(-2)],
                     start_date="2013")

LabourDay = european_labour_day()

LiberationDay = Holiday(
    "Liberation Day",
    month=5,
    day=8,
)

SaintsCyrilAndMethodiusDay = Holiday(
    "Saints Cyril and Methodius Day",
    month=7,
    day=5,
)
コード例 #26
0
ファイル: test_ops.py プロジェクト: zeneli/pandas
class TestDatetimeIndexOps(Ops):
    def setup_method(self, method):
        super().setup_method(method)
        mask = lambda x: (isinstance(x, DatetimeIndex) or isinstance(x, PeriodIndex))
        self.is_valid_objs = [o for o in self.objs if mask(o)]
        self.not_valid_objs = [o for o in self.objs if not mask(o)]

    def test_ops_properties(self):
        f = lambda x: isinstance(x, DatetimeIndex)
        self.check_ops_properties(DatetimeIndex._field_ops, f)
        self.check_ops_properties(DatetimeIndex._object_ops, f)
        self.check_ops_properties(DatetimeIndex._bool_ops, f)

    def test_ops_properties_basic(self):

        # sanity check that the behavior didn't change
        # GH#7206
        msg = "'Series' object has no attribute '{}'"
        for op in ["year", "day", "second", "weekday"]:
            with pytest.raises(AttributeError, match=msg.format(op)):
                getattr(self.dt_series, op)

        # attribute access should still work!
        s = Series(dict(year=2000, month=1, day=10))
        assert s.year == 2000
        assert s.month == 1
        assert s.day == 10
        msg = "'Series' object has no attribute 'weekday'"
        with pytest.raises(AttributeError, match=msg):
            s.weekday

    def test_repeat_range(self, tz_naive_fixture):
        tz = tz_naive_fixture
        rng = date_range("1/1/2000", "1/1/2001")

        result = rng.repeat(5)
        assert result.freq is None
        assert len(result) == 5 * len(rng)

        index = pd.date_range("2001-01-01", periods=2, freq="D", tz=tz)
        exp = pd.DatetimeIndex(
            ["2001-01-01", "2001-01-01", "2001-01-02", "2001-01-02"], tz=tz
        )
        for res in [index.repeat(2), np.repeat(index, 2)]:
            tm.assert_index_equal(res, exp)
            assert res.freq is None

        index = pd.date_range("2001-01-01", periods=2, freq="2D", tz=tz)
        exp = pd.DatetimeIndex(
            ["2001-01-01", "2001-01-01", "2001-01-03", "2001-01-03"], tz=tz
        )
        for res in [index.repeat(2), np.repeat(index, 2)]:
            tm.assert_index_equal(res, exp)
            assert res.freq is None

        index = pd.DatetimeIndex(["2001-01-01", "NaT", "2003-01-01"], tz=tz)
        exp = pd.DatetimeIndex(
            [
                "2001-01-01",
                "2001-01-01",
                "2001-01-01",
                "NaT",
                "NaT",
                "NaT",
                "2003-01-01",
                "2003-01-01",
                "2003-01-01",
            ],
            tz=tz,
        )
        for res in [index.repeat(3), np.repeat(index, 3)]:
            tm.assert_index_equal(res, exp)
            assert res.freq is None

    def test_repeat(self, tz_naive_fixture):
        tz = tz_naive_fixture
        reps = 2
        msg = "the 'axis' parameter is not supported"

        rng = pd.date_range(start="2016-01-01", periods=2, freq="30Min", tz=tz)

        expected_rng = DatetimeIndex(
            [
                Timestamp("2016-01-01 00:00:00", tz=tz, freq="30T"),
                Timestamp("2016-01-01 00:00:00", tz=tz, freq="30T"),
                Timestamp("2016-01-01 00:30:00", tz=tz, freq="30T"),
                Timestamp("2016-01-01 00:30:00", tz=tz, freq="30T"),
            ]
        )

        res = rng.repeat(reps)
        tm.assert_index_equal(res, expected_rng)
        assert res.freq is None

        tm.assert_index_equal(np.repeat(rng, reps), expected_rng)
        with pytest.raises(ValueError, match=msg):
            np.repeat(rng, reps, axis=1)

    def test_resolution(self, tz_naive_fixture):
        tz = tz_naive_fixture
        for freq, expected in zip(
            ["A", "Q", "M", "D", "H", "T", "S", "L", "U"],
            [
                "day",
                "day",
                "day",
                "day",
                "hour",
                "minute",
                "second",
                "millisecond",
                "microsecond",
            ],
        ):
            idx = pd.date_range(start="2013-04-01", periods=30, freq=freq, tz=tz)
            assert idx.resolution == expected

    def test_value_counts_unique(self, tz_naive_fixture):
        tz = tz_naive_fixture
        # GH 7735
        idx = pd.date_range("2011-01-01 09:00", freq="H", periods=10)
        # create repeated values, 'n'th element is repeated by n+1 times
        idx = DatetimeIndex(np.repeat(idx.values, range(1, len(idx) + 1)), tz=tz)

        exp_idx = pd.date_range("2011-01-01 18:00", freq="-1H", periods=10, tz=tz)
        expected = Series(range(10, 0, -1), index=exp_idx, dtype="int64")

        for obj in [idx, Series(idx)]:
            tm.assert_series_equal(obj.value_counts(), expected)

        expected = pd.date_range("2011-01-01 09:00", freq="H", periods=10, tz=tz)
        tm.assert_index_equal(idx.unique(), expected)

        idx = DatetimeIndex(
            [
                "2013-01-01 09:00",
                "2013-01-01 09:00",
                "2013-01-01 09:00",
                "2013-01-01 08:00",
                "2013-01-01 08:00",
                pd.NaT,
            ],
            tz=tz,
        )

        exp_idx = DatetimeIndex(["2013-01-01 09:00", "2013-01-01 08:00"], tz=tz)
        expected = Series([3, 2], index=exp_idx)

        for obj in [idx, Series(idx)]:
            tm.assert_series_equal(obj.value_counts(), expected)

        exp_idx = DatetimeIndex(["2013-01-01 09:00", "2013-01-01 08:00", pd.NaT], tz=tz)
        expected = Series([3, 2, 1], index=exp_idx)

        for obj in [idx, Series(idx)]:
            tm.assert_series_equal(obj.value_counts(dropna=False), expected)

        tm.assert_index_equal(idx.unique(), exp_idx)

    def test_nonunique_contains(self):
        # GH 9512
        for idx in map(
            DatetimeIndex,
            (
                [0, 1, 0],
                [0, 0, -1],
                [0, -1, -1],
                ["2015", "2015", "2016"],
                ["2015", "2015", "2014"],
            ),
        ):
            assert idx[0] in idx

    @pytest.mark.parametrize(
        "idx",
        [
            DatetimeIndex(
                ["2011-01-01", "2011-01-02", "2011-01-03"], freq="D", name="idx"
            ),
            DatetimeIndex(
                ["2011-01-01 09:00", "2011-01-01 10:00", "2011-01-01 11:00"],
                freq="H",
                name="tzidx",
                tz="Asia/Tokyo",
            ),
        ],
    )
    def test_order_with_freq(self, idx):
        ordered = idx.sort_values()
        tm.assert_index_equal(ordered, idx)
        assert ordered.freq == idx.freq

        ordered = idx.sort_values(ascending=False)
        expected = idx[::-1]
        tm.assert_index_equal(ordered, expected)
        assert ordered.freq == expected.freq
        assert ordered.freq.n == -1

        ordered, indexer = idx.sort_values(return_indexer=True)
        tm.assert_index_equal(ordered, idx)
        tm.assert_numpy_array_equal(indexer, np.array([0, 1, 2]), check_dtype=False)
        assert ordered.freq == idx.freq

        ordered, indexer = idx.sort_values(return_indexer=True, ascending=False)
        expected = idx[::-1]
        tm.assert_index_equal(ordered, expected)
        tm.assert_numpy_array_equal(indexer, np.array([2, 1, 0]), check_dtype=False)
        assert ordered.freq == expected.freq
        assert ordered.freq.n == -1

    @pytest.mark.parametrize(
        "index_dates,expected_dates",
        [
            (
                ["2011-01-01", "2011-01-03", "2011-01-05", "2011-01-02", "2011-01-01"],
                ["2011-01-01", "2011-01-01", "2011-01-02", "2011-01-03", "2011-01-05"],
            ),
            (
                ["2011-01-01", "2011-01-03", "2011-01-05", "2011-01-02", "2011-01-01"],
                ["2011-01-01", "2011-01-01", "2011-01-02", "2011-01-03", "2011-01-05"],
            ),
            (
                [pd.NaT, "2011-01-03", "2011-01-05", "2011-01-02", pd.NaT],
                [pd.NaT, pd.NaT, "2011-01-02", "2011-01-03", "2011-01-05"],
            ),
        ],
    )
    def test_order_without_freq(self, index_dates, expected_dates, tz_naive_fixture):
        tz = tz_naive_fixture

        # without freq
        index = DatetimeIndex(index_dates, tz=tz, name="idx")
        expected = DatetimeIndex(expected_dates, tz=tz, name="idx")

        ordered = index.sort_values()
        tm.assert_index_equal(ordered, expected)
        assert ordered.freq is None

        ordered = index.sort_values(ascending=False)
        tm.assert_index_equal(ordered, expected[::-1])
        assert ordered.freq is None

        ordered, indexer = index.sort_values(return_indexer=True)
        tm.assert_index_equal(ordered, expected)

        exp = np.array([0, 4, 3, 1, 2])
        tm.assert_numpy_array_equal(indexer, exp, check_dtype=False)
        assert ordered.freq is None

        ordered, indexer = index.sort_values(return_indexer=True, ascending=False)
        tm.assert_index_equal(ordered, expected[::-1])

        exp = np.array([2, 1, 3, 4, 0])
        tm.assert_numpy_array_equal(indexer, exp, check_dtype=False)
        assert ordered.freq is None

    def test_drop_duplicates_metadata(self):
        # GH 10115
        idx = pd.date_range("2011-01-01", "2011-01-31", freq="D", name="idx")
        result = idx.drop_duplicates()
        tm.assert_index_equal(idx, result)
        assert idx.freq == result.freq

        idx_dup = idx.append(idx)
        assert idx_dup.freq is None  # freq is reset
        result = idx_dup.drop_duplicates()
        tm.assert_index_equal(idx, result)
        assert result.freq is None

    def test_drop_duplicates(self):
        # to check Index/Series compat
        base = pd.date_range("2011-01-01", "2011-01-31", freq="D", name="idx")
        idx = base.append(base[:5])

        res = idx.drop_duplicates()
        tm.assert_index_equal(res, base)
        res = Series(idx).drop_duplicates()
        tm.assert_series_equal(res, Series(base))

        res = idx.drop_duplicates(keep="last")
        exp = base[5:].append(base[:5])
        tm.assert_index_equal(res, exp)
        res = Series(idx).drop_duplicates(keep="last")
        tm.assert_series_equal(res, Series(exp, index=np.arange(5, 36)))

        res = idx.drop_duplicates(keep=False)
        tm.assert_index_equal(res, base[5:])
        res = Series(idx).drop_duplicates(keep=False)
        tm.assert_series_equal(res, Series(base[5:], index=np.arange(5, 31)))

    @pytest.mark.parametrize(
        "freq",
        [
            "A",
            "2A",
            "-2A",
            "Q",
            "-1Q",
            "M",
            "-1M",
            "D",
            "3D",
            "-3D",
            "W",
            "-1W",
            "H",
            "2H",
            "-2H",
            "T",
            "2T",
            "S",
            "-3S",
        ],
    )
    def test_infer_freq(self, freq):
        # GH 11018
        idx = pd.date_range("2011-01-01 09:00:00", freq=freq, periods=10)
        result = pd.DatetimeIndex(idx.asi8, freq="infer")
        tm.assert_index_equal(idx, result)
        assert result.freq == freq

    def test_nat(self, tz_naive_fixture):
        tz = tz_naive_fixture
        assert pd.DatetimeIndex._na_value is pd.NaT
        assert pd.DatetimeIndex([])._na_value is pd.NaT

        idx = pd.DatetimeIndex(["2011-01-01", "2011-01-02"], tz=tz)
        assert idx._can_hold_na

        tm.assert_numpy_array_equal(idx._isnan, np.array([False, False]))
        assert idx.hasnans is False
        tm.assert_numpy_array_equal(idx._nan_idxs, np.array([], dtype=np.intp))

        idx = pd.DatetimeIndex(["2011-01-01", "NaT"], tz=tz)
        assert idx._can_hold_na

        tm.assert_numpy_array_equal(idx._isnan, np.array([False, True]))
        assert idx.hasnans is True
        tm.assert_numpy_array_equal(idx._nan_idxs, np.array([1], dtype=np.intp))

    def test_equals(self):
        # GH 13107
        idx = pd.DatetimeIndex(["2011-01-01", "2011-01-02", "NaT"])
        assert idx.equals(idx)
        assert idx.equals(idx.copy())
        assert idx.equals(idx.astype(object))
        assert idx.astype(object).equals(idx)
        assert idx.astype(object).equals(idx.astype(object))
        assert not idx.equals(list(idx))
        assert not idx.equals(pd.Series(idx))

        idx2 = pd.DatetimeIndex(["2011-01-01", "2011-01-02", "NaT"], tz="US/Pacific")
        assert not idx.equals(idx2)
        assert not idx.equals(idx2.copy())
        assert not idx.equals(idx2.astype(object))
        assert not idx.astype(object).equals(idx2)
        assert not idx.equals(list(idx2))
        assert not idx.equals(pd.Series(idx2))

        # same internal, different tz
        idx3 = pd.DatetimeIndex._simple_new(idx.asi8, tz="US/Pacific")
        tm.assert_numpy_array_equal(idx.asi8, idx3.asi8)
        assert not idx.equals(idx3)
        assert not idx.equals(idx3.copy())
        assert not idx.equals(idx3.astype(object))
        assert not idx.astype(object).equals(idx3)
        assert not idx.equals(list(idx3))
        assert not idx.equals(pd.Series(idx3))

        # check that we do not raise when comparing with OutOfBounds objects
        oob = pd.Index([datetime(2500, 1, 1)] * 3, dtype=object)
        assert not idx.equals(oob)
        assert not idx2.equals(oob)
        assert not idx3.equals(oob)

        # check that we do not raise when comparing with OutOfBounds dt64
        oob2 = oob.map(np.datetime64)
        assert not idx.equals(oob2)
        assert not idx2.equals(oob2)
        assert not idx3.equals(oob2)

    @pytest.mark.parametrize("values", [["20180101", "20180103", "20180105"], []])
    @pytest.mark.parametrize("freq", ["2D", Day(2), "2B", BDay(2), "48H", Hour(48)])
    @pytest.mark.parametrize("tz", [None, "US/Eastern"])
    def test_freq_setter(self, values, freq, tz):
        # GH 20678
        idx = DatetimeIndex(values, tz=tz)

        # can set to an offset, converting from string if necessary
        idx.freq = freq
        assert idx.freq == freq
        assert isinstance(idx.freq, ABCDateOffset)

        # can reset to None
        idx.freq = None
        assert idx.freq is None

    def test_freq_setter_errors(self):
        # GH 20678
        idx = DatetimeIndex(["20180101", "20180103", "20180105"])

        # setting with an incompatible freq
        msg = (
            "Inferred frequency 2D from passed values does not conform to "
            "passed frequency 5D"
        )
        with pytest.raises(ValueError, match=msg):
            idx.freq = "5D"

        # setting with non-freq string
        with pytest.raises(ValueError, match="Invalid frequency"):
            idx.freq = "foo"

    def test_offset_deprecated(self):
        # GH 20716
        idx = pd.DatetimeIndex(["20180101", "20180102"])

        # getter deprecated
        with tm.assert_produces_warning(FutureWarning):
            idx.offset

        # setter deprecated
        with tm.assert_produces_warning(FutureWarning):
            idx.offset = BDay()
コード例 #27
0
ファイル: frequencies.py プロジェクト: quaintm/pandas

from pandas.tseries.offsets import (Nano, Micro, Milli, Second, Minute, Hour,
                                    Day, BDay, CDay, Week, MonthBegin,
                                    MonthEnd, BMonthBegin, BMonthEnd,
                                    QuarterBegin, QuarterEnd, BQuarterBegin,
                                    BQuarterEnd, YearBegin, YearEnd,
                                    BYearBegin, BYearEnd,
                                    )
try:
    cday = CDay()
except NotImplementedError:
    cday = None

_offset_map = {
    'D': Day(),
    'C': cday,
    'B': BDay(),
    'H': Hour(),
    'T': Minute(),
    'S': Second(),
    'L': Milli(),
    'U': Micro(),
    None: None,

    # Monthly - Calendar
    'M': MonthEnd(),
    'MS': MonthBegin(),

    # Monthly - Business
    'BM': BMonthEnd(),
コード例 #28
0
# coding: utf-8

import pandas as pd
from time import time
from pandas.tseries.offsets import Day
day7 = Day(7)


def load_data(path):
    global data, data_all

    start = time()

    # Choose to not parse the index col
    data = pd.read_csv(path,
                       index_col='event_time',
                       parse_dates=True,
                       usecols=[1, 2, 3, 4, 5])
    data = data.sort_index()
    # Can not be sure 'SHOT_RECORDED' is informative, if I treat it like "OPEN", cannot guarantee the user actually played,
    # so thoses records are removed
    data = data[data['event_name'] != 'SHOT_RECORDED']

    # Can not be sure 'APP_CLOSED' is informative in this specific task.
    # Cannot treat them as "OPEN" status and assumes the app was just opened minutes ago but not recoreded.
    # Since an 'APP_CLOSED' status may imply the app was opened more than 7 days ago,
    # I myself sometime have apps backgrounded for weeks.
    data = data[data['event_name'] != 'APP_CLOSED']

    # Since now 'event_name' feature has only one category 'APP_OPEN', it can be dropped
    data = data.drop('event_name', axis=1)
コード例 #29
0
try:
    cday = CDay()
except NotImplementedError:
    cday = None

#: cache of previously seen offsets
_offset_map = {}


def get_period_alias(offset_str):
    """ alias to closest period strings BQ->Q etc"""
    return _offset_to_period_map.get(offset_str, None)


_name_to_offset_map = {
    'days': Day(1),
    'hours': Hour(1),
    'minutes': Minute(1),
    'seconds': Second(1),
    'milliseconds': Milli(1),
    'microseconds': Micro(1),
    'nanoseconds': Nano(1)
}


@deprecate_kwarg(old_arg_name='freqstr', new_arg_name='freq')
def to_offset(freq):
    """
    Return DateOffset object from string or tuple representation
    or datetime.timedelta object
コード例 #30
0
def create_data():
    """ create the pickle data """
    data = {
        "A": [0.0, 1.0, 2.0, 3.0, np.nan],
        "B": [0, 1, 0, 1, 0],
        "C": ["foo1", "foo2", "foo3", "foo4", "foo5"],
        "D": date_range("1/1/2009", periods=5),
        "E": [0.0, 1, Timestamp("20100101"), "foo", 2.0],
    }

    scalars = dict(timestamp=Timestamp("20130101"), period=Period("2012", "M"))

    index = dict(
        int=Index(np.arange(10)),
        date=date_range("20130101", periods=10),
        period=period_range("2013-01-01", freq="M", periods=10),
        float=Index(np.arange(10, dtype=np.float64)),
        uint=Index(np.arange(10, dtype=np.uint64)),
        timedelta=timedelta_range("00:00:00", freq="30T", periods=10),
    )

    index["range"] = RangeIndex(10)

    if _loose_version >= LooseVersion("0.21"):
        from pandas import interval_range

        index["interval"] = interval_range(0, periods=10)

    mi = dict(reg2=MultiIndex.from_tuples(
        tuple(
            zip(*[
                ["bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux"],
                ["one", "two", "one", "two", "one", "two", "one", "two"],
            ])),
        names=["first", "second"],
    ))

    series = dict(
        float=Series(data["A"]),
        int=Series(data["B"]),
        mixed=Series(data["E"]),
        ts=Series(np.arange(10).astype(np.int64),
                  index=date_range("20130101", periods=10)),
        mi=Series(
            np.arange(5).astype(np.float64),
            index=MultiIndex.from_tuples(tuple(
                zip(*[[1, 1, 2, 2, 2], [3, 4, 3, 4, 5]])),
                                         names=["one", "two"]),
        ),
        dup=Series(np.arange(5).astype(np.float64),
                   index=["A", "B", "C", "D", "A"]),
        cat=Series(Categorical(["foo", "bar", "baz"])),
        dt=Series(date_range("20130101", periods=5)),
        dt_tz=Series(date_range("20130101", periods=5, tz="US/Eastern")),
        period=Series([Period("2000Q1")] * 5),
    )

    mixed_dup_df = DataFrame(data)
    mixed_dup_df.columns = list("ABCDA")
    frame = dict(
        float=DataFrame({
            "A": series["float"],
            "B": series["float"] + 1
        }),
        int=DataFrame({
            "A": series["int"],
            "B": series["int"] + 1
        }),
        mixed=DataFrame({k: data[k]
                         for k in ["A", "B", "C", "D"]}),
        mi=DataFrame(
            {
                "A": np.arange(5).astype(np.float64),
                "B": np.arange(5).astype(np.int64)
            },
            index=MultiIndex.from_tuples(
                tuple(
                    zip(*[
                        ["bar", "bar", "baz", "baz", "baz"],
                        ["one", "two", "one", "two", "three"],
                    ])),
                names=["first", "second"],
            ),
        ),
        dup=DataFrame(np.arange(15).reshape(5, 3).astype(np.float64),
                      columns=["A", "B", "A"]),
        cat_onecol=DataFrame({"A": Categorical(["foo", "bar"])}),
        cat_and_float=DataFrame({
            "A": Categorical(["foo", "bar", "baz"]),
            "B": np.arange(3).astype(np.int64),
        }),
        mixed_dup=mixed_dup_df,
        dt_mixed_tzs=DataFrame(
            {
                "A": Timestamp("20130102", tz="US/Eastern"),
                "B": Timestamp("20130603", tz="CET"),
            },
            index=range(5),
        ),
        dt_mixed2_tzs=DataFrame(
            {
                "A": Timestamp("20130102", tz="US/Eastern"),
                "B": Timestamp("20130603", tz="CET"),
                "C": Timestamp("20130603", tz="UTC"),
            },
            index=range(5),
        ),
    )

    cat = dict(
        int8=Categorical(list("abcdefg")),
        int16=Categorical(np.arange(1000)),
        int32=Categorical(np.arange(10000)),
    )

    timestamp = dict(
        normal=Timestamp("2011-01-01"),
        nat=NaT,
        tz=Timestamp("2011-01-01", tz="US/Eastern"),
    )

    timestamp["freq"] = Timestamp("2011-01-01", freq="D")
    timestamp["both"] = Timestamp("2011-01-01", tz="Asia/Tokyo", freq="M")

    off = {
        "DateOffset": DateOffset(years=1),
        "DateOffset_h_ns": DateOffset(hour=6, nanoseconds=5824),
        "BusinessDay": BusinessDay(offset=timedelta(seconds=9)),
        "BusinessHour": BusinessHour(normalize=True, n=6, end="15:14"),
        "CustomBusinessDay": CustomBusinessDay(weekmask="Mon Fri"),
        "SemiMonthBegin": SemiMonthBegin(day_of_month=9),
        "SemiMonthEnd": SemiMonthEnd(day_of_month=24),
        "MonthBegin": MonthBegin(1),
        "MonthEnd": MonthEnd(1),
        "QuarterBegin": QuarterBegin(1),
        "QuarterEnd": QuarterEnd(1),
        "Day": Day(1),
        "YearBegin": YearBegin(1),
        "YearEnd": YearEnd(1),
        "Week": Week(1),
        "Week_Tues": Week(2, normalize=False, weekday=1),
        "WeekOfMonth": WeekOfMonth(week=3, weekday=4),
        "LastWeekOfMonth": LastWeekOfMonth(n=1, weekday=3),
        "FY5253": FY5253(n=2, weekday=6, startingMonth=7, variation="last"),
        "Easter": Easter(),
        "Hour": Hour(1),
        "Minute": Minute(1),
    }

    return dict(
        series=series,
        frame=frame,
        index=index,
        scalars=scalars,
        mi=mi,
        sp_series=dict(float=_create_sp_series(), ts=_create_sp_tsseries()),
        sp_frame=dict(float=_create_sp_frame()),
        cat=cat,
        timestamp=timestamp,
        offsets=off,
    )