Example #1
0
class TestDatelike(object):
    @pytest.mark.parametrize('s', [
        Series(DatetimeIndex(['20180101', NaT, '20180103'])),
        Series(TimedeltaIndex(['0 days', NaT, '2 days']))
    ],
                             ids=lambda x: str(x.dtype))
    def test_qcut_nat(self, s):
        # GH 19768
        intervals = IntervalIndex.from_tuples([(s[0] - Nano(), s[2] - Day()),
                                               np.nan, (s[2] - Day(), s[2])])
        expected = Series(Categorical(intervals, ordered=True))
        result = qcut(s, 2)
        tm.assert_series_equal(result, expected)

    def test_datetime_cut(self):
        # GH 14714
        # testing for time data to be present as series
        data = to_datetime(Series(['2013-01-01', '2013-01-02', '2013-01-03']))

        result, bins = cut(data, 3, retbins=True)
        expected = (Series(
            IntervalIndex([
                Interval(Timestamp('2012-12-31 23:57:07.200000'),
                         Timestamp('2013-01-01 16:00:00')),
                Interval(Timestamp('2013-01-01 16:00:00'),
                         Timestamp('2013-01-02 08:00:00')),
                Interval(Timestamp('2013-01-02 08:00:00'),
                         Timestamp('2013-01-03 00:00:00'))
            ])).astype(CDT(ordered=True)))

        tm.assert_series_equal(result, expected)

        # testing for time data to be present as list
        data = [
            np.datetime64('2013-01-01'),
            np.datetime64('2013-01-02'),
            np.datetime64('2013-01-03')
        ]
        result, bins = cut(data, 3, retbins=True)
        tm.assert_series_equal(Series(result), expected)

        # testing for time data to be present as ndarray
        data = np.array([
            np.datetime64('2013-01-01'),
            np.datetime64('2013-01-02'),
            np.datetime64('2013-01-03')
        ])
        result, bins = cut(data, 3, retbins=True)
        tm.assert_series_equal(Series(result), expected)

        # testing for time data to be present as datetime index
        data = DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03'])
        result, bins = cut(data, 3, retbins=True)
        tm.assert_series_equal(Series(result), expected)

    @pytest.mark.parametrize('bins', [
        3,
        [
            Timestamp('2013-01-01 04:57:07.200000'),
            Timestamp('2013-01-01 21:00:00'),
            Timestamp('2013-01-02 13:00:00'),
            Timestamp('2013-01-03 05:00:00')
        ]
    ])
    @pytest.mark.parametrize('box', [list, np.array, Index, Series])
    def test_datetimetz_cut(self, bins, box):
        # GH 19872
        tz = 'US/Eastern'
        s = Series(date_range('20130101', periods=3, tz=tz))
        if not isinstance(bins, int):
            bins = box(bins)
        result = cut(s, bins)
        expected = (Series(
            IntervalIndex([
                Interval(Timestamp('2012-12-31 23:57:07.200000', tz=tz),
                         Timestamp('2013-01-01 16:00:00', tz=tz)),
                Interval(Timestamp('2013-01-01 16:00:00', tz=tz),
                         Timestamp('2013-01-02 08:00:00', tz=tz)),
                Interval(Timestamp('2013-01-02 08:00:00', tz=tz),
                         Timestamp('2013-01-03 00:00:00', tz=tz))
            ])).astype(CDT(ordered=True)))
        tm.assert_series_equal(result, expected)

    @pytest.mark.parametrize('bins', [3, np.linspace(0, 1, 4)])
    def test_datetimetz_qcut(self, bins):
        # GH 19872
        tz = 'US/Eastern'
        s = Series(date_range('20130101', periods=3, tz=tz))
        result = qcut(s, bins)
        expected = (Series(
            IntervalIndex([
                Interval(Timestamp('2012-12-31 23:59:59.999999999', tz=tz),
                         Timestamp('2013-01-01 16:00:00', tz=tz)),
                Interval(Timestamp('2013-01-01 16:00:00', tz=tz),
                         Timestamp('2013-01-02 08:00:00', tz=tz)),
                Interval(Timestamp('2013-01-02 08:00:00', tz=tz),
                         Timestamp('2013-01-03 00:00:00', tz=tz))
            ])).astype(CDT(ordered=True)))
        tm.assert_series_equal(result, expected)

    def test_datetime_bin(self):
        data = [np.datetime64('2012-12-13'), np.datetime64('2012-12-15')]
        bin_data = ['2012-12-12', '2012-12-14', '2012-12-16']
        expected = (Series(
            IntervalIndex([
                Interval(Timestamp(bin_data[0]), Timestamp(bin_data[1])),
                Interval(Timestamp(bin_data[1]), Timestamp(bin_data[2]))
            ])).astype(CDT(ordered=True)))

        for conv in [Timestamp, Timestamp, np.datetime64]:
            bins = [conv(v) for v in bin_data]
            result = cut(data, bins=bins)
            tm.assert_series_equal(Series(result), expected)

        bin_pydatetime = [Timestamp(v).to_pydatetime() for v in bin_data]
        result = cut(data, bins=bin_pydatetime)
        tm.assert_series_equal(Series(result), expected)

        bins = to_datetime(bin_data)
        result = cut(data, bins=bin_pydatetime)
        tm.assert_series_equal(Series(result), expected)

    def test_datetime_nan(self):
        def f():
            cut(date_range('20130101', periods=3), bins=[0, 2, 4])

        pytest.raises(ValueError, f)

        result = cut(date_range('20130102', periods=5),
                     bins=date_range('20130101', periods=2))
        mask = result.categories.isna()
        tm.assert_numpy_array_equal(mask, np.array([False]))
        mask = result.isna()
        tm.assert_numpy_array_equal(mask,
                                    np.array([False, True, True, True, True]))
Example #2
0
 def test_int64_nocopy(self):
     # GH#23539 check that a copy isn't made when we pass int64 data
     #  and copy=False
     arr = np.arange(10, dtype=np.int64)
     tdi = TimedeltaIndex(arr, copy=False)
     assert tdi._data._data.base is arr
Example #3
0
    def test_order(self):
        # GH 10295
        idx1 = TimedeltaIndex(["1 day", "2 day", "3 day"],
                              freq="D",
                              name="idx")
        idx2 = TimedeltaIndex(["1 hour", "2 hour", "3 hour"],
                              freq="H",
                              name="idx")

        for idx in [idx1, idx2]:
            ordered = idx.sort_values()
            tm.assert_index_equal(ordered, idx)
            assert ordered.freq == idx.freq

            ordered = idx.sort_values(ascending=False)
            expected = idx[::-1]
            tm.assert_index_equal(ordered, expected)
            assert ordered.freq == expected.freq
            assert ordered.freq.n == -1

            ordered, indexer = idx.sort_values(return_indexer=True)
            tm.assert_index_equal(ordered, idx)
            tm.assert_numpy_array_equal(indexer,
                                        np.array([0, 1, 2]),
                                        check_dtype=False)
            assert ordered.freq == idx.freq

            ordered, indexer = idx.sort_values(return_indexer=True,
                                               ascending=False)
            tm.assert_index_equal(ordered, idx[::-1])
            assert ordered.freq == expected.freq
            assert ordered.freq.n == -1

        idx1 = TimedeltaIndex(
            ["1 hour", "3 hour", "5 hour", "2 hour ", "1 hour"], name="idx1")
        exp1 = TimedeltaIndex(
            ["1 hour", "1 hour", "2 hour", "3 hour", "5 hour"], name="idx1")

        idx2 = TimedeltaIndex(["1 day", "3 day", "5 day", "2 day", "1 day"],
                              name="idx2")

        for idx, expected in [(idx1, exp1), (idx1, exp1), (idx1, exp1)]:
            ordered = idx.sort_values()
            tm.assert_index_equal(ordered, expected)
            assert ordered.freq is None

            ordered = idx.sort_values(ascending=False)
            tm.assert_index_equal(ordered, expected[::-1])
            assert ordered.freq is None

            ordered, indexer = idx.sort_values(return_indexer=True)
            tm.assert_index_equal(ordered, expected)

            exp = np.array([0, 4, 3, 1, 2])
            tm.assert_numpy_array_equal(indexer, exp, check_dtype=False)
            assert ordered.freq is None

            ordered, indexer = idx.sort_values(return_indexer=True,
                                               ascending=False)
            tm.assert_index_equal(ordered, expected[::-1])

            exp = np.array([2, 1, 3, 4, 0])
            tm.assert_numpy_array_equal(indexer, exp, check_dtype=False)
            assert ordered.freq is None
Example #4
0
    def test_round(self):

        t1 = Timedelta('1 days 02:34:56.789123456')
        t2 = Timedelta('-1 days 02:34:56.789123456')

        for (freq, s1, s2) in [
            ('N', t1, t2),
            ('U', Timedelta('1 days 02:34:56.789123000'),
             Timedelta('-1 days 02:34:56.789123000')),
            ('L', Timedelta('1 days 02:34:56.789000000'),
             Timedelta('-1 days 02:34:56.789000000')),
            ('S', Timedelta('1 days 02:34:57'), Timedelta('-1 days 02:34:57')),
            ('2S', Timedelta('1 days 02:34:56'),
             Timedelta('-1 days 02:34:56')),
            ('5S', Timedelta('1 days 02:34:55'),
             Timedelta('-1 days 02:34:55')),
            ('T', Timedelta('1 days 02:35:00'), Timedelta('-1 days 02:35:00')),
            ('12T', Timedelta('1 days 02:36:00'),
             Timedelta('-1 days 02:36:00')),
            ('H', Timedelta('1 days 03:00:00'), Timedelta('-1 days 03:00:00')),
            ('d', Timedelta('1 days'), Timedelta('-1 days'))
        ]:
            r1 = t1.round(freq)
            assert r1 == s1
            r2 = t2.round(freq)
            assert r2 == s2

        # invalid
        for freq in ['Y', 'M', 'foobar']:
            pytest.raises(ValueError, lambda: t1.round(freq))

        t1 = timedelta_range('1 days', periods=3, freq='1 min 2 s 3 us')
        t2 = -1 * t1
        t1a = timedelta_range('1 days', periods=3, freq='1 min 2 s')
        t1c = pd.TimedeltaIndex([1, 1, 1], unit='D')

        # note that negative times round DOWN! so don't give whole numbers
        for (freq, s1, s2) in [
            ('N', t1, t2), ('U', t1, t2),
            ('L', t1a,
             TimedeltaIndex([
                 '-1 days +00:00:00', '-2 days +23:58:58', '-2 days +23:57:56'
             ],
                            dtype='timedelta64[ns]',
                            freq=None)),
            ('S', t1a,
             TimedeltaIndex([
                 '-1 days +00:00:00', '-2 days +23:58:58', '-2 days +23:57:56'
             ],
                            dtype='timedelta64[ns]',
                            freq=None)),
            ('12T', t1c,
             TimedeltaIndex(['-1 days', '-1 days', '-1 days'],
                            dtype='timedelta64[ns]',
                            freq=None)),
            ('H', t1c,
             TimedeltaIndex(['-1 days', '-1 days', '-1 days'],
                            dtype='timedelta64[ns]',
                            freq=None)),
            ('d', t1c, pd.TimedeltaIndex([-1, -1, -1], unit='D'))
        ]:

            r1 = t1.round(freq)
            tm.assert_index_equal(r1, s1)
            r2 = t2.round(freq)
        tm.assert_index_equal(r2, s2)

        # invalid
        for freq in ['Y', 'M', 'foobar']:
            pytest.raises(ValueError, lambda: t1.round(freq))
Example #5
0
    def test_value_counts_datetime64(self, klass):

        # GH 3002, datetime64[ns]
        # don't test names though
        txt = "\n".join([
            'xxyyzz20100101PIE', 'xxyyzz20100101GUM', 'xxyyzz20100101EGG',
            'xxyyww20090101EGG', 'foofoo20080909PIE', 'foofoo20080909GUM'
        ])
        f = StringIO(txt)
        df = pd.read_fwf(f,
                         widths=[6, 8, 3],
                         names=["person_id", "dt", "food"],
                         parse_dates=["dt"])

        s = klass(df['dt'].copy())
        s.name = None
        idx = pd.to_datetime([
            '2010-01-01 00:00:00', '2008-09-09 00:00:00', '2009-01-01 00:00:00'
        ])
        expected_s = Series([3, 2, 1], index=idx)
        tm.assert_series_equal(s.value_counts(), expected_s)

        expected = np_array_datetime64_compat([
            '2010-01-01 00:00:00', '2009-01-01 00:00:00', '2008-09-09 00:00:00'
        ],
                                              dtype='datetime64[ns]')
        if isinstance(s, Index):
            tm.assert_index_equal(s.unique(), DatetimeIndex(expected))
        else:
            tm.assert_numpy_array_equal(s.unique(), expected)

        assert s.nunique() == 3

        # with NaT
        s = df['dt'].copy()
        s = klass([v for v in s.values] + [pd.NaT])

        result = s.value_counts()
        assert result.index.dtype == 'datetime64[ns]'
        tm.assert_series_equal(result, expected_s)

        result = s.value_counts(dropna=False)
        expected_s[pd.NaT] = 1
        tm.assert_series_equal(result, expected_s)

        unique = s.unique()
        assert unique.dtype == 'datetime64[ns]'

        # numpy_array_equal cannot compare pd.NaT
        if isinstance(s, Index):
            exp_idx = DatetimeIndex(expected.tolist() + [pd.NaT])
            tm.assert_index_equal(unique, exp_idx)
        else:
            tm.assert_numpy_array_equal(unique[:3], expected)
            assert pd.isna(unique[3])

        assert s.nunique() == 3
        assert s.nunique(dropna=False) == 4

        # timedelta64[ns]
        td = df.dt - df.dt + timedelta(1)
        td = klass(td, name='dt')

        result = td.value_counts()
        expected_s = Series([6], index=[Timedelta('1day')], name='dt')
        tm.assert_series_equal(result, expected_s)

        expected = TimedeltaIndex(['1 days'], name='dt')
        if isinstance(td, Index):
            tm.assert_index_equal(td.unique(), expected)
        else:
            tm.assert_numpy_array_equal(td.unique(), expected.values)

        td2 = timedelta(1) + (df.dt - df.dt)
        td2 = klass(td2, name='dt')
        result2 = td2.value_counts()
        tm.assert_series_equal(result2, expected_s)
Example #6
0
 def test_verify_integrity_deprecated(self):
     # GH#23919
     with tm.assert_produces_warning(FutureWarning):
         TimedeltaIndex(['1 Day'], verify_integrity=False)
Example #7
0
    def test_value_counts_inferred(self):
        klasses = [Index, Series]
        for klass in klasses:
            s_values = ['a', 'b', 'b', 'b', 'b', 'c', 'd', 'd', 'a', 'a']
            s = klass(s_values)
            expected = Series([4, 3, 2, 1], index=['b', 'a', 'd', 'c'])
            tm.assert_series_equal(s.value_counts(), expected)

            self.assert_numpy_array_equal(s.unique(), np.unique(s_values))
            self.assertEqual(s.nunique(), 4)
            # don't sort, have to sort after the fact as not sorting is platform-dep
            hist = s.value_counts(sort=False)
            hist.sort()
            expected = Series([3, 1, 4, 2], index=list('acbd'))
            expected.sort()
            tm.assert_series_equal(hist, expected)

            # sort ascending
            hist = s.value_counts(ascending=True)
            expected = Series([1, 2, 3, 4], index=list('cdab'))
            tm.assert_series_equal(hist, expected)

            # relative histogram.
            hist = s.value_counts(normalize=True)
            expected = Series([.4, .3, .2, .1], index=['b', 'a', 'd', 'c'])
            tm.assert_series_equal(hist, expected)

            # bins
            self.assertRaises(TypeError,
                              lambda bins: s.value_counts(bins=bins), 1)

            s1 = Series([1, 1, 2, 3])
            res1 = s1.value_counts(bins=1)
            exp1 = Series({0.998: 4})
            tm.assert_series_equal(res1, exp1)
            res1n = s1.value_counts(bins=1, normalize=True)
            exp1n = Series({0.998: 1.0})
            tm.assert_series_equal(res1n, exp1n)

            self.assert_numpy_array_equal(s1.unique(), np.array([1, 2, 3]))
            self.assertEqual(s1.nunique(), 3)

            res4 = s1.value_counts(bins=4)
            exp4 = Series({
                0.998: 2,
                1.5: 1,
                2.0: 0,
                2.5: 1
            },
                          index=[0.998, 2.5, 1.5, 2.0])
            tm.assert_series_equal(res4, exp4)
            res4n = s1.value_counts(bins=4, normalize=True)
            exp4n = Series({
                0.998: 0.5,
                1.5: 0.25,
                2.0: 0.0,
                2.5: 0.25
            },
                           index=[0.998, 2.5, 1.5, 2.0])
            tm.assert_series_equal(res4n, exp4n)

            # handle NA's properly
            s_values = [
                'a', 'b', 'b', 'b', np.nan, np.nan, 'd', 'd', 'a', 'a', 'b'
            ]
            s = klass(s_values)
            expected = Series([4, 3, 2], index=['b', 'a', 'd'])
            tm.assert_series_equal(s.value_counts(), expected)

            self.assert_numpy_array_equal(
                s.unique(), np.array(['a', 'b', np.nan, 'd'], dtype='O'))
            self.assertEqual(s.nunique(), 3)

            s = klass({})
            expected = Series([], dtype=np.int64)
            tm.assert_series_equal(s.value_counts(), expected)
            self.assert_numpy_array_equal(s.unique(), np.array([]))
            self.assertEqual(s.nunique(), 0)

            # GH 3002, datetime64[ns]
            txt = "\n".join([
                'xxyyzz20100101PIE', 'xxyyzz20100101GUM', 'xxyyzz20100101EGG',
                'xxyyww20090101EGG', 'foofoo20080909PIE', 'foofoo20080909GUM'
            ])
            f = StringIO(txt)
            df = pd.read_fwf(f,
                             widths=[6, 8, 3],
                             names=["person_id", "dt", "food"],
                             parse_dates=["dt"])

            s = klass(df['dt'].copy(), name='dt')

            idx = pd.to_datetime([
                '2010-01-01 00:00:00Z', '2008-09-09 00:00:00Z',
                '2009-01-01 00:00:00X'
            ])
            expected_s = Series([3, 2, 1], index=idx, name='dt')
            tm.assert_series_equal(s.value_counts(), expected_s)

            expected = np.array([
                '2010-01-01 00:00:00Z', '2009-01-01 00:00:00Z',
                '2008-09-09 00:00:00Z'
            ],
                                dtype='datetime64[ns]')
            if isinstance(s, DatetimeIndex):
                expected = DatetimeIndex(expected)
                self.assertTrue(s.unique().equals(expected))
            else:
                self.assert_numpy_array_equal(s.unique(), expected)

            self.assertEqual(s.nunique(), 3)

            # with NaT
            s = df['dt'].copy()
            s = klass([v for v in s.values] + [pd.NaT], name='dt')

            result = s.value_counts()
            self.assertEqual(result.index.dtype, 'datetime64[ns]')
            tm.assert_series_equal(result, expected_s)

            result = s.value_counts(dropna=False)
            expected_s[pd.NaT] = 1
            tm.assert_series_equal(result, expected_s)

            unique = s.unique()
            self.assertEqual(unique.dtype, 'datetime64[ns]')
            # numpy_array_equal cannot compare pd.NaT
            self.assert_numpy_array_equal(unique[:3], expected)
            self.assertTrue(unique[3] is pd.NaT
                            or unique[3].astype('int64') == pd.tslib.iNaT)

            self.assertEqual(s.nunique(), 3)
            self.assertEqual(s.nunique(dropna=False), 4)

            # timedelta64[ns]
            td = df.dt - df.dt + timedelta(1)
            td = klass(td, name='dt')

            result = td.value_counts()
            expected_s = Series([6], index=[Timedelta('1day')], name='dt')
            tm.assert_series_equal(result, expected_s)

            expected = TimedeltaIndex(['1 days'])
            if isinstance(td, TimedeltaIndex):
                self.assertTrue(td.unique().equals(expected))
            else:
                self.assert_numpy_array_equal(td.unique(), expected.values)

            td2 = timedelta(1) + (df.dt - df.dt)
            td2 = klass(td2, name='dt')
            result2 = td2.value_counts()
            tm.assert_series_equal(result2, expected_s)
Example #8
0
    def test_shift(self, datetime_series):
        shifted = datetime_series.shift(1)
        unshifted = shifted.shift(-1)

        tm.assert_index_equal(shifted.index, datetime_series.index)
        tm.assert_index_equal(unshifted.index, datetime_series.index)
        tm.assert_numpy_array_equal(unshifted.dropna().values,
                                    datetime_series.values[:-1])

        offset = BDay()
        shifted = datetime_series.shift(1, freq=offset)
        unshifted = shifted.shift(-1, freq=offset)

        tm.assert_series_equal(unshifted, datetime_series)

        unshifted = datetime_series.shift(0, freq=offset)
        tm.assert_series_equal(unshifted, datetime_series)

        shifted = datetime_series.shift(1, freq="B")
        unshifted = shifted.shift(-1, freq="B")

        tm.assert_series_equal(unshifted, datetime_series)

        # corner case
        unshifted = datetime_series.shift(0)
        tm.assert_series_equal(unshifted, datetime_series)

        # Shifting with PeriodIndex
        ps = tm.makePeriodSeries()
        shifted = ps.shift(1)
        unshifted = shifted.shift(-1)
        tm.assert_index_equal(shifted.index, ps.index)
        tm.assert_index_equal(unshifted.index, ps.index)
        tm.assert_numpy_array_equal(unshifted.dropna().values, ps.values[:-1])

        shifted2 = ps.shift(1, "B")
        shifted3 = ps.shift(1, BDay())
        tm.assert_series_equal(shifted2, shifted3)
        tm.assert_series_equal(ps, shifted2.shift(-1, "B"))

        msg = "Given freq D does not match PeriodIndex freq B"
        with pytest.raises(ValueError, match=msg):
            ps.shift(freq="D")

        # legacy support
        shifted4 = ps.shift(1, freq="B")
        tm.assert_series_equal(shifted2, shifted4)

        shifted5 = ps.shift(1, freq=BDay())
        tm.assert_series_equal(shifted5, shifted4)

        # 32-bit taking
        # GH#8129
        index = date_range("2000-01-01", periods=5)
        for dtype in ["int32", "int64"]:
            s1 = Series(np.arange(5, dtype=dtype), index=index)
            p = s1.iloc[1]
            result = s1.shift(periods=p)
            expected = Series([np.nan, 0, 1, 2, 3], index=index)
            tm.assert_series_equal(result, expected)

        # GH#8260
        # with tz
        s = Series(date_range("2000-01-01 09:00:00",
                              periods=5,
                              tz="US/Eastern"),
                   name="foo")
        result = s - s.shift()

        exp = Series(TimedeltaIndex(["NaT"] + ["1 days"] * 4), name="foo")
        tm.assert_series_equal(result, exp)

        # incompat tz
        s2 = Series(date_range("2000-01-01 09:00:00", periods=5, tz="CET"),
                    name="foo")
        msg = "DatetimeArray subtraction must have the same timezones or no timezones"
        with pytest.raises(TypeError, match=msg):
            s - s2
Example #9
0
    def test_dt_namespace_accessor(self):

        # GH 7207, 11128
        # test .dt namespace accessor

        ok_for_period = PeriodArray._datetimelike_ops
        ok_for_period_methods = ["strftime", "to_timestamp", "asfreq"]
        ok_for_dt = DatetimeIndex._datetimelike_ops
        ok_for_dt_methods = [
            "to_period",
            "to_pydatetime",
            "tz_localize",
            "tz_convert",
            "normalize",
            "strftime",
            "round",
            "floor",
            "ceil",
            "day_name",
            "month_name",
            "isocalendar",
        ]
        ok_for_td = TimedeltaIndex._datetimelike_ops
        ok_for_td_methods = [
            "components",
            "to_pytimedelta",
            "total_seconds",
            "round",
            "floor",
            "ceil",
        ]

        def get_expected(s, name):
            result = getattr(Index(s._values), prop)
            if isinstance(result, np.ndarray):
                if is_integer_dtype(result):
                    result = result.astype("int64")
            elif not is_list_like(result) or isinstance(result, DataFrame):
                return result
            return Series(result, index=s.index, name=s.name)

        def compare(s, name):
            a = getattr(s.dt, prop)
            b = get_expected(s, prop)
            if not (is_list_like(a) and is_list_like(b)):
                assert a == b
            elif isinstance(a, DataFrame):
                tm.assert_frame_equal(a, b)
            else:
                tm.assert_series_equal(a, b)

        # datetimeindex
        cases = [
            Series(date_range("20130101", periods=5), name="xxx"),
            Series(date_range("20130101", periods=5, freq="s"), name="xxx"),
            Series(date_range("20130101 00:00:00", periods=5, freq="ms"),
                   name="xxx"),
        ]
        for s in cases:
            for prop in ok_for_dt:
                # we test freq below
                # we ignore week and weekofyear because they are deprecated
                if prop not in ["freq", "week", "weekofyear"]:
                    compare(s, prop)

            for prop in ok_for_dt_methods:
                getattr(s.dt, prop)

            result = s.dt.to_pydatetime()
            assert isinstance(result, np.ndarray)
            assert result.dtype == object

            result = s.dt.tz_localize("US/Eastern")
            exp_values = DatetimeIndex(s.values).tz_localize("US/Eastern")
            expected = Series(exp_values, index=s.index, name="xxx")
            tm.assert_series_equal(result, expected)

            tz_result = result.dt.tz
            assert str(tz_result) == "US/Eastern"
            freq_result = s.dt.freq
            assert freq_result == DatetimeIndex(s.values, freq="infer").freq

            # let's localize, then convert
            result = s.dt.tz_localize("UTC").dt.tz_convert("US/Eastern")
            exp_values = (DatetimeIndex(
                s.values).tz_localize("UTC").tz_convert("US/Eastern"))
            expected = Series(exp_values, index=s.index, name="xxx")
            tm.assert_series_equal(result, expected)

        # datetimeindex with tz
        s = Series(date_range("20130101", periods=5, tz="US/Eastern"),
                   name="xxx")
        for prop in ok_for_dt:

            # we test freq below
            # we ignore week and weekofyear because they are deprecated
            if prop not in ["freq", "week", "weekofyear"]:
                compare(s, prop)

        for prop in ok_for_dt_methods:
            getattr(s.dt, prop)

        result = s.dt.to_pydatetime()
        assert isinstance(result, np.ndarray)
        assert result.dtype == object

        result = s.dt.tz_convert("CET")
        expected = Series(s._values.tz_convert("CET"),
                          index=s.index,
                          name="xxx")
        tm.assert_series_equal(result, expected)

        tz_result = result.dt.tz
        assert str(tz_result) == "CET"
        freq_result = s.dt.freq
        assert freq_result == DatetimeIndex(s.values, freq="infer").freq

        # timedelta index
        cases = [
            Series(timedelta_range("1 day", periods=5),
                   index=list("abcde"),
                   name="xxx"),
            Series(timedelta_range("1 day 01:23:45", periods=5, freq="s"),
                   name="xxx"),
            Series(
                timedelta_range("2 days 01:23:45.012345", periods=5,
                                freq="ms"),
                name="xxx",
            ),
        ]
        for s in cases:
            for prop in ok_for_td:
                # we test freq below
                if prop != "freq":
                    compare(s, prop)

            for prop in ok_for_td_methods:
                getattr(s.dt, prop)

            result = s.dt.components
            assert isinstance(result, DataFrame)
            tm.assert_index_equal(result.index, s.index)

            result = s.dt.to_pytimedelta()
            assert isinstance(result, np.ndarray)
            assert result.dtype == object

            result = s.dt.total_seconds()
            assert isinstance(result, Series)
            assert result.dtype == "float64"

            freq_result = s.dt.freq
            assert freq_result == TimedeltaIndex(s.values, freq="infer").freq

        # both
        index = date_range("20130101", periods=3, freq="D")
        s = Series(date_range("20140204", periods=3, freq="s"),
                   index=index,
                   name="xxx")
        exp = Series(np.array([2014, 2014, 2014], dtype="int64"),
                     index=index,
                     name="xxx")
        tm.assert_series_equal(s.dt.year, exp)

        exp = Series(np.array([2, 2, 2], dtype="int64"),
                     index=index,
                     name="xxx")
        tm.assert_series_equal(s.dt.month, exp)

        exp = Series(np.array([0, 1, 2], dtype="int64"),
                     index=index,
                     name="xxx")
        tm.assert_series_equal(s.dt.second, exp)

        exp = Series([s[0]] * 3, index=index, name="xxx")
        tm.assert_series_equal(s.dt.normalize(), exp)

        # periodindex
        cases = [
            Series(period_range("20130101", periods=5, freq="D"), name="xxx")
        ]
        for s in cases:
            for prop in ok_for_period:
                # we test freq below
                if prop != "freq":
                    compare(s, prop)

            for prop in ok_for_period_methods:
                getattr(s.dt, prop)

            freq_result = s.dt.freq
            assert freq_result == PeriodIndex(s.values).freq

        # test limited display api
        def get_dir(s):
            results = [r for r in s.dt.__dir__() if not r.startswith("_")]
            return sorted(set(results))

        s = Series(date_range("20130101", periods=5, freq="D"), name="xxx")
        results = get_dir(s)
        tm.assert_almost_equal(results,
                               sorted(set(ok_for_dt + ok_for_dt_methods)))

        s = Series(
            period_range("20130101", periods=5, freq="D",
                         name="xxx").astype(object))
        results = get_dir(s)
        tm.assert_almost_equal(
            results, sorted(set(ok_for_period + ok_for_period_methods)))

        # 11295
        # ambiguous time error on the conversions
        s = Series(date_range("2015-01-01", "2016-01-01", freq="T"),
                   name="xxx")
        s = s.dt.tz_localize("UTC").dt.tz_convert("America/Chicago")
        results = get_dir(s)
        tm.assert_almost_equal(results,
                               sorted(set(ok_for_dt + ok_for_dt_methods)))
        exp_values = date_range("2015-01-01", "2016-01-01", freq="T",
                                tz="UTC").tz_convert("America/Chicago")
        # freq not preserved by tz_localize above
        exp_values = exp_values._with_freq(None)
        expected = Series(exp_values, name="xxx")
        tm.assert_series_equal(s, expected)

        # no setting allowed
        s = Series(date_range("20130101", periods=5, freq="D"), name="xxx")
        with pytest.raises(ValueError, match="modifications"):
            s.dt.hour = 5

        # trying to set a copy
        msg = "modifications to a property of a datetimelike.+not supported"
        with pd.option_context("chained_assignment", "raise"):
            with pytest.raises(com.SettingWithCopyError, match=msg):
                s.dt.hour[0] = 5
Example #10
0
 def test_argmin_argmax(self):
     idx = TimedeltaIndex(
         ["1 day 00:00:05", "1 day 00:00:01", "1 day 00:00:02"])
     assert idx.argmin() == 1
     assert idx.argmax() == 0
Example #11
0
def ccf(x,
        y,
        lags=365,
        bin_method='rectangle',
        bin_width=0.5,
        max_gap=inf,
        min_obs=100,
        full_output=False,
        alpha=0.05):
    """Method to compute the cross-correlation for irregular time series.

    Parameters
    ----------
    x,y: pandas.Series
        Pandas Series containing the values to calculate the
        cross-correlation for. The index has to be a Pandas.DatetimeIndex
    lags: array_like, optional
        numpy array containing the lags in days for which the
        cross-correlation is calculated. Default [1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
        12, 13, 14, 30, 61, 90, 120, 150, 180, 210, 240, 270, 300, 330, 365]
    bin_method: str, optional
        method to determine the type of bin. Options are "rectangle" (default),
        and  "gaussian".
    bin_width: float, optional
        number of days used as the width for the bin to calculate the
        correlation. By default these values are chosen based on the
        bin_method and the average time step (dt_mu). That is 0.5dt_mu when
        bin_method="rectangle" and 0.25dt_mu when bin_method="gaussian".
    max_gap: float, optional
        Maximum timestep gap in the data. All timesteps above this gap value
        are not used for calculating the average timestep. This can be
        helpful when there is a large gap in the data that influences the
        average timestep.
    min_obs: int, optional
        Minimum number of observations in a bin to determine the correlation.
    full_output: bool, optional
        If True, also estimated uncertainties are returned. Default is False.
    alpha: float
        alpha level to compute the confidence interval (e.g., 1-alpha).

    Returns
    -------
    c: pandas.Series or pandas.DataFrame
        The Cross-correlation function.

    References
    ----------
    Rehfeld, K., Marwan, N., Heitzig, J., Kurths, J. (2011). Comparison
    of correlation analysis techniques for irregularly sampled time series.
    Nonlinear Processes in Geophysics. 18. 389-404. 10.5194 pg-18-389-2011.

    Tip
    ---
    This method will be significantly faster when Numba is installed. Check
    out the [Numba project here](https://numba.pydata.org)

    Examples
    --------
    >>> ccf = ps.stats.ccf(x, y, bin_method="gaussian")

    """
    # prepare the time indices for x and y
    if x.index.inferred_freq and y.index.inferred_freq:
        bin_method = "regular"
    elif bin_method == "regular":
        raise Warning("time series does not have regular time steps, "
                      "choose different bin_method")

    x, t_x, dt_x_mu = _preprocess(x, max_gap=max_gap)
    y, t_y, dt_y_mu = _preprocess(y, max_gap=max_gap)
    dt_mu = max(dt_x_mu, dt_y_mu)  # Mean time step from both series

    if isinstance(lags, int) and bin_method == "regular":
        lags = arange(int(dt_mu), lags + 1, int(dt_mu), dtype=float)
    elif isinstance(lags, int):
        lags = arange(1.0, lags + 1, dtype=float)
    elif isinstance(lags, list):
        lags = array(lags, dtype=float)

    if bin_method == "rectangle":
        if bin_width is None:
            bin_width = 0.5 * dt_mu
        c, b = _compute_ccf_rectangle(lags, t_x, x, t_y, y, bin_width)
    elif bin_method == "gaussian":
        if bin_width is None:
            bin_width = 0.25 * dt_mu
        c, b = _compute_ccf_gaussian(lags, t_x, x, t_y, y, bin_width)
    elif bin_method == "regular":
        c, b = _compute_ccf_regular(arange(1.0, len(lags) + 1), x, y)
    else:
        raise NotImplementedError

    std = norm.ppf(1 - alpha / 2.) / sqrt(b)
    result = DataFrame(data={
        "ccf": c,
        "stderr": std,
        "n": b
    },
                       index=TimedeltaIndex(lags, unit="D", name="Lags"))

    result = result.where(result.n > min_obs).dropna()

    if full_output:
        return result
    else:
        return result.ccf
Example #12
0
 def test_searchsorted_invalid_argument_dtype(self, arg):
     idx = TimedeltaIndex(["1 day", "2 days", "3 days"])
     msg = "searchsorted requires compatible dtype"
     with pytest.raises(TypeError, match=msg):
         idx.searchsorted(arg)
Example #13
0
 def test_constructor_wrong_precision_raises(self):
     msg = r"dtype timedelta64\[us\] cannot be converted to timedelta64\[ns\]"
     with pytest.raises(ValueError, match=msg):
         TimedeltaIndex(["2000"], dtype="timedelta64[us]")
Example #14
0
class TestTimedeltaIndex:
    def test_union(self):

        i1 = timedelta_range("1day", periods=5)
        i2 = timedelta_range("3day", periods=5)
        result = i1.union(i2)
        expected = timedelta_range("1day", periods=7)
        tm.assert_index_equal(result, expected)

        i1 = Int64Index(np.arange(0, 20, 2))
        i2 = timedelta_range(start="1 day", periods=10, freq="D")
        i1.union(i2)  # Works
        i2.union(i1)  # Fails with "AttributeError: can't set attribute"

    def test_union_coverage(self):

        idx = TimedeltaIndex(["3d", "1d", "2d"])
        ordered = TimedeltaIndex(idx.sort_values(), freq="infer")
        result = ordered.union(idx)
        tm.assert_index_equal(result, ordered)

        result = ordered[:0].union(ordered)
        tm.assert_index_equal(result, ordered)
        assert result.freq == ordered.freq

    def test_union_bug_1730(self):

        rng_a = timedelta_range("1 day", periods=4, freq="3H")
        rng_b = timedelta_range("1 day", periods=4, freq="4H")

        result = rng_a.union(rng_b)
        exp = TimedeltaIndex(sorted(set(list(rng_a)) | set(list(rng_b))))
        tm.assert_index_equal(result, exp)

    def test_union_bug_1745(self):

        left = TimedeltaIndex(["1 day 15:19:49.695000"])
        right = TimedeltaIndex([
            "2 day 13:04:21.322000", "1 day 15:27:24.873000",
            "1 day 15:31:05.350000"
        ])

        result = left.union(right)
        exp = TimedeltaIndex(sorted(set(list(left)) | set(list(right))))
        tm.assert_index_equal(result, exp)

    def test_union_bug_4564(self):

        left = timedelta_range("1 day", "30d")
        right = left + pd.offsets.Minute(15)

        result = left.union(right)
        exp = TimedeltaIndex(sorted(set(list(left)) | set(list(right))))
        tm.assert_index_equal(result, exp)

    def test_intersection_bug_1708(self):
        index_1 = timedelta_range("1 day", periods=4, freq="h")
        index_2 = index_1 + pd.offsets.Hour(5)

        result = index_1 & index_2
        assert len(result) == 0

        index_1 = timedelta_range("1 day", periods=4, freq="h")
        index_2 = index_1 + pd.offsets.Hour(1)

        result = index_1 & index_2
        expected = timedelta_range("1 day 01:00:00", periods=3, freq="h")
        tm.assert_index_equal(result, expected)

    @pytest.mark.parametrize("sort", [None, False])
    def test_intersection_equal(self, sort):
        # GH 24471 Test intersection outcome given the sort keyword
        # for equal indicies intersection should return the original index
        first = timedelta_range("1 day", periods=4, freq="h")
        second = timedelta_range("1 day", periods=4, freq="h")
        intersect = first.intersection(second, sort=sort)
        if sort is None:
            tm.assert_index_equal(intersect, second.sort_values())
        assert tm.equalContents(intersect, second)

        # Corner cases
        inter = first.intersection(first, sort=sort)
        assert inter is first

    @pytest.mark.parametrize("period_1, period_2", [(0, 4), (4, 0)])
    @pytest.mark.parametrize("sort", [None, False])
    def test_intersection_zero_length(self, period_1, period_2, sort):
        # GH 24471 test for non overlap the intersection should be zero length
        index_1 = timedelta_range("1 day", periods=period_1, freq="h")
        index_2 = timedelta_range("1 day", periods=period_2, freq="h")
        expected = timedelta_range("1 day", periods=0, freq="h")
        result = index_1.intersection(index_2, sort=sort)
        tm.assert_index_equal(result, expected)

    @pytest.mark.parametrize("sort", [None, False])
    def test_zero_length_input_index(self, sort):
        # GH 24966 test for 0-len intersections are copied
        index_1 = timedelta_range("1 day", periods=0, freq="h")
        index_2 = timedelta_range("1 day", periods=3, freq="h")
        result = index_1.intersection(index_2, sort=sort)
        assert index_1 is not result
        assert index_2 is not result
        tm.assert_copy(result, index_1)

    @pytest.mark.parametrize(
        "rng, expected",
        # if target has the same name, it is preserved
        [
            (
                timedelta_range("1 day", periods=5, freq="h", name="idx"),
                timedelta_range("1 day", periods=4, freq="h", name="idx"),
            ),
            # if target name is different, it will be reset
            (
                timedelta_range("1 day", periods=5, freq="h", name="other"),
                timedelta_range("1 day", periods=4, freq="h", name=None),
            ),
            # if no overlap exists return empty index
            (
                timedelta_range("1 day", periods=10, freq="h", name="idx")[5:],
                TimedeltaIndex([], name="idx"),
            ),
        ],
    )
    @pytest.mark.parametrize("sort", [None, False])
    def test_intersection(self, rng, expected, sort):
        # GH 4690 (with tz)
        base = timedelta_range("1 day", periods=4, freq="h", name="idx")
        result = base.intersection(rng, sort=sort)
        if sort is None:
            expected = expected.sort_values()
        tm.assert_index_equal(result, expected)
        assert result.name == expected.name
        assert result.freq == expected.freq

    @pytest.mark.parametrize(
        "rng, expected",
        # part intersection works
        [
            (
                TimedeltaIndex(["5 hour", "2 hour", "4 hour", "9 hour"],
                               name="idx"),
                TimedeltaIndex(["2 hour", "4 hour"], name="idx"),
            ),
            # reordered part intersection
            (
                TimedeltaIndex(["2 hour", "5 hour", "5 hour", "1 hour"],
                               name="other"),
                TimedeltaIndex(["1 hour", "2 hour"], name=None),
            ),
            # reveresed index
            (
                TimedeltaIndex(["1 hour", "2 hour", "4 hour", "3 hour"],
                               name="idx")[::-1],
                TimedeltaIndex(["1 hour", "2 hour", "4 hour", "3 hour"],
                               name="idx"),
            ),
        ],
    )
    @pytest.mark.parametrize("sort", [None, False])
    def test_intersection_non_monotonic(self, rng, expected, sort):
        # 24471 non-monotonic
        base = TimedeltaIndex(["1 hour", "2 hour", "4 hour", "3 hour"],
                              name="idx")
        result = base.intersection(rng, sort=sort)
        if sort is None:
            expected = expected.sort_values()
        tm.assert_index_equal(result, expected)
        assert result.name == expected.name

        # if reveresed order, frequency is still the same
        if all(base == rng[::-1]) and sort is None:
            assert isinstance(result.freq, Hour)
        else:
            assert result.freq is None
Example #15
0
    def test_subtraction_ops_with_tz(self):

        # check that dt/dti subtraction ops with tz are validated
        dti = date_range('20130101', periods=3)
        ts = Timestamp('20130101')
        dt = ts.to_pydatetime()
        dti_tz = date_range('20130101', periods=3).tz_localize('US/Eastern')
        ts_tz = Timestamp('20130101').tz_localize('US/Eastern')
        ts_tz2 = Timestamp('20130101').tz_localize('CET')
        dt_tz = ts_tz.to_pydatetime()
        td = Timedelta('1 days')

        def _check(result, expected):
            assert result == expected
            assert isinstance(result, Timedelta)

        # scalars
        result = ts - ts
        expected = Timedelta('0 days')
        _check(result, expected)

        result = dt_tz - ts_tz
        expected = Timedelta('0 days')
        _check(result, expected)

        result = ts_tz - dt_tz
        expected = Timedelta('0 days')
        _check(result, expected)

        # tz mismatches
        pytest.raises(TypeError, lambda: dt_tz - ts)
        pytest.raises(TypeError, lambda: dt_tz - dt)
        pytest.raises(TypeError, lambda: dt_tz - ts_tz2)
        pytest.raises(TypeError, lambda: dt - dt_tz)
        pytest.raises(TypeError, lambda: ts - dt_tz)
        pytest.raises(TypeError, lambda: ts_tz2 - ts)
        pytest.raises(TypeError, lambda: ts_tz2 - dt)
        pytest.raises(TypeError, lambda: ts_tz - ts_tz2)

        # with dti
        pytest.raises(TypeError, lambda: dti - ts_tz)
        pytest.raises(TypeError, lambda: dti_tz - ts)
        pytest.raises(TypeError, lambda: dti_tz - ts_tz2)

        result = dti_tz - dt_tz
        expected = TimedeltaIndex(['0 days', '1 days', '2 days'])
        tm.assert_index_equal(result, expected)

        result = dt_tz - dti_tz
        expected = TimedeltaIndex(['0 days', '-1 days', '-2 days'])
        tm.assert_index_equal(result, expected)

        result = dti_tz - ts_tz
        expected = TimedeltaIndex(['0 days', '1 days', '2 days'])
        tm.assert_index_equal(result, expected)

        result = ts_tz - dti_tz
        expected = TimedeltaIndex(['0 days', '-1 days', '-2 days'])
        tm.assert_index_equal(result, expected)

        result = td - td
        expected = Timedelta('0 days')
        _check(result, expected)

        result = dti_tz - td
        expected = DatetimeIndex(
            ['20121231', '20130101', '20130102'], tz='US/Eastern')
        tm.assert_index_equal(result, expected)
Example #16
0
    def test_round(self):

        t1 = Timedelta("1 days 02:34:56.789123456")
        t2 = Timedelta("-1 days 02:34:56.789123456")

        for (freq, s1, s2) in [
            ("N", t1, t2),
            (
                "U",
                Timedelta("1 days 02:34:56.789123000"),
                Timedelta("-1 days 02:34:56.789123000"),
            ),
            (
                "L",
                Timedelta("1 days 02:34:56.789000000"),
                Timedelta("-1 days 02:34:56.789000000"),
            ),
            ("S", Timedelta("1 days 02:34:57"), Timedelta("-1 days 02:34:57")),
            ("2S", Timedelta("1 days 02:34:56"), Timedelta("-1 days 02:34:56")),
            ("5S", Timedelta("1 days 02:34:55"), Timedelta("-1 days 02:34:55")),
            ("T", Timedelta("1 days 02:35:00"), Timedelta("-1 days 02:35:00")),
            ("12T", Timedelta("1 days 02:36:00"), Timedelta("-1 days 02:36:00")),
            ("H", Timedelta("1 days 03:00:00"), Timedelta("-1 days 03:00:00")),
            ("d", Timedelta("1 days"), Timedelta("-1 days")),
        ]:
            r1 = t1.round(freq)
            assert r1 == s1
            r2 = t2.round(freq)
            assert r2 == s2

        # invalid
        for freq, msg in [
            ("Y", "<YearEnd: month=12> is a non-fixed frequency"),
            ("M", "<MonthEnd> is a non-fixed frequency"),
            ("foobar", "Invalid frequency: foobar"),
        ]:
            with pytest.raises(ValueError, match=msg):
                t1.round(freq)

        t1 = timedelta_range("1 days", periods=3, freq="1 min 2 s 3 us")
        t2 = -1 * t1
        t1a = timedelta_range("1 days", periods=3, freq="1 min 2 s")
        t1c = TimedeltaIndex([1, 1, 1], unit="D")

        # note that negative times round DOWN! so don't give whole numbers
        for (freq, s1, s2) in [
            ("N", t1, t2),
            ("U", t1, t2),
            (
                "L",
                t1a,
                TimedeltaIndex(
                    ["-1 days +00:00:00", "-2 days +23:58:58", "-2 days +23:57:56"],
                    dtype="timedelta64[ns]",
                    freq=None,
                ),
            ),
            (
                "S",
                t1a,
                TimedeltaIndex(
                    ["-1 days +00:00:00", "-2 days +23:58:58", "-2 days +23:57:56"],
                    dtype="timedelta64[ns]",
                    freq=None,
                ),
            ),
            (
                "12T",
                t1c,
                TimedeltaIndex(
                    ["-1 days", "-1 days", "-1 days"],
                    dtype="timedelta64[ns]",
                    freq=None,
                ),
            ),
            (
                "H",
                t1c,
                TimedeltaIndex(
                    ["-1 days", "-1 days", "-1 days"],
                    dtype="timedelta64[ns]",
                    freq=None,
                ),
            ),
            ("d", t1c, TimedeltaIndex([-1, -1, -1], unit="D")),
        ]:

            r1 = t1.round(freq)
            tm.assert_index_equal(r1, s1)
            r2 = t2.round(freq)
            tm.assert_index_equal(r2, s2)

        # invalid
        for freq, msg in [
            ("Y", "<YearEnd: month=12> is a non-fixed frequency"),
            ("M", "<MonthEnd> is a non-fixed frequency"),
            ("foobar", "Invalid frequency: foobar"),
        ]:
            with pytest.raises(ValueError, match=msg):
                t1.round(freq)
        tm.assert_numpy_array_equal(result, expected)

    def test_strftime_nat(self):
        # GH 29578
        arr = PeriodArray(PeriodIndex(["2019-01-01", NaT], dtype="period[D]"))

        result = arr.strftime("%Y-%m-%d")
        expected = np.array(["2019-01-01", np.nan], dtype=object)
        tm.assert_numpy_array_equal(result, expected)


@pytest.mark.parametrize(
    "arr,casting_nats",
    [
        (
            TimedeltaIndex(["1 Day", "3 Hours", "NaT"])._data,
            (NaT, np.timedelta64("NaT", "ns")),
        ),
        (
            pd.date_range("2000-01-01", periods=3, freq="D")._data,
            (NaT, np.datetime64("NaT", "ns")),
        ),
        (pd.period_range("2000-01-01", periods=3, freq="D")._data, (NaT, )),
    ],
    ids=lambda x: type(x).__name__,
)
def test_casting_nat_setitem_array(arr, casting_nats):
    expected = type(arr)._from_sequence([NaT, arr[1], arr[2]])

    for nat in casting_nats:
        arr = arr.copy()
Example #18
0
@pytest.mark.parametrize(
    "op_name",
    [
        "left_plus_right", "right_plus_left", "left_minus_right",
        "right_minus_left"
    ],
)
@pytest.mark.parametrize(
    "value",
    [
        DatetimeIndex(["2011-01-01", "2011-01-02"], name="x"),
        DatetimeIndex(["2011-01-01", "2011-01-02"], tz="US/Eastern", name="x"),
        DatetimeArray._from_sequence(["2011-01-01", "2011-01-02"]),
        DatetimeArray._from_sequence(["2011-01-01", "2011-01-02"],
                                     tz="US/Pacific"),
        TimedeltaIndex(["1 day", "2 day"], name="x"),
    ],
)
def test_nat_arithmetic_index(op_name, value):
    # see gh-11718
    exp_name = "x"
    exp_data = [NaT] * 2

    if is_datetime64_any_dtype(value.dtype) and "plus" in op_name:
        expected = DatetimeIndex(exp_data, tz=value.tz, name=exp_name)
    else:
        expected = TimedeltaIndex(exp_data, name=exp_name)

    if not isinstance(value, Index):
        expected = expected.array
Example #19
0
 def test_range_kwargs_deprecated(self):
     # GH#23919
     with tm.assert_produces_warning(FutureWarning):
         TimedeltaIndex(start='1 Day', end='3 Days', freq='D')
Example #20
0
def diff(arr, n, axis=0):
    """ difference of n between self,
        analagoust to s-s.shift(n) """

    n = int(n)
    na = np.nan
    dtype = arr.dtype
    is_timedelta = False
    if needs_i8_conversion(arr):
        dtype = np.float64
        arr = arr.view('i8')
        na = tslib.iNaT
        is_timedelta = True
    elif issubclass(dtype.type, np.integer):
        dtype = np.float64
    elif issubclass(dtype.type, np.bool_):
        dtype = np.object_

    dtype = np.dtype(dtype)
    out_arr = np.empty(arr.shape, dtype=dtype)

    na_indexer = [slice(None)] * arr.ndim
    na_indexer[axis] = slice(None, n) if n >= 0 else slice(n, None)
    out_arr[tuple(na_indexer)] = na

    if arr.ndim == 2 and arr.dtype.name in _diff_special:
        f = _diff_special[arr.dtype.name]
        f(arr, out_arr, n, axis)
    else:
        res_indexer = [slice(None)] * arr.ndim
        res_indexer[axis] = slice(n, None) if n >= 0 else slice(None, n)
        res_indexer = tuple(res_indexer)

        lag_indexer = [slice(None)] * arr.ndim
        lag_indexer[axis] = slice(None, -n) if n > 0 else slice(-n, None)
        lag_indexer = tuple(lag_indexer)

        # need to make sure that we account for na for datelike/timedelta
        # we don't actually want to subtract these i8 numbers
        if is_timedelta:
            res = arr[res_indexer]
            lag = arr[lag_indexer]

            mask = (arr[res_indexer] == na) | (arr[lag_indexer] == na)
            if mask.any():
                res = res.copy()
                res[mask] = 0
                lag = lag.copy()
                lag[mask] = 0

            result = res - lag
            result[mask] = na
            out_arr[res_indexer] = result
        else:
            out_arr[res_indexer] = arr[res_indexer] - arr[lag_indexer]

    if is_timedelta:
        from pandas import TimedeltaIndex
        out_arr = TimedeltaIndex(out_arr.ravel().astype('int64')).asi8.reshape(
            out_arr.shape).astype('timedelta64[ns]')

    return out_arr
Example #21
0
    if labels is None:
        intervals = IntervalIndex([Interval(start, end)] * length,
                                  closed="right")
        expected = Series(intervals).astype(CDT(ordered=True))
    else:
        expected = Series([0] * length, dtype=np.intp)

    tm.assert_series_equal(result, expected)


@pytest.mark.parametrize(
    "ser",
    [
        Series(DatetimeIndex(["20180101", NaT, "20180103"])),
        Series(TimedeltaIndex(["0 days", NaT, "2 days"])),
    ],
    ids=lambda x: str(x.dtype),
)
def test_qcut_nat(ser):
    # see gh-19768
    intervals = IntervalIndex.from_tuples([(ser[0] - Nano(), ser[2] - Day()),
                                           np.nan, (ser[2] - Day(), ser[2])])
    expected = Series(Categorical(intervals, ordered=True))

    result = qcut(ser, 2)
    tm.assert_series_equal(result, expected)


@pytest.mark.parametrize("bins", [3, np.linspace(0, 1, 4)])
def test_datetime_tz_qcut(bins):
Example #22
0
 def test_shift_no_freq(self):
     # GH#19147
     tdi = TimedeltaIndex(['1 days 01:00:00', '2 days 01:00:00'], freq=None)
     with pytest.raises(NullFrequencyError):
         tdi.shift(2)
Example #23
0
    def test_dt_namespace_accessor(self):

        # GH 7207, 11128
        # test .dt namespace accessor

        ok_for_period = PeriodArray._datetimelike_ops
        ok_for_period_methods = ['strftime', 'to_timestamp', 'asfreq']
        ok_for_dt = DatetimeIndex._datetimelike_ops
        ok_for_dt_methods = [
            'to_period', 'to_pydatetime', 'tz_localize', 'tz_convert',
            'normalize', 'strftime', 'round', 'floor', 'ceil', 'day_name',
            'month_name'
        ]
        ok_for_td = TimedeltaIndex._datetimelike_ops
        ok_for_td_methods = [
            'components', 'to_pytimedelta', 'total_seconds', 'round', 'floor',
            'ceil'
        ]

        def get_expected(s, name):
            result = getattr(Index(s._values), prop)
            if isinstance(result, np.ndarray):
                if is_integer_dtype(result):
                    result = result.astype('int64')
            elif not is_list_like(result):
                return result
            return Series(result, index=s.index, name=s.name)

        def compare(s, name):
            a = getattr(s.dt, prop)
            b = get_expected(s, prop)
            if not (is_list_like(a) and is_list_like(b)):
                assert a == b
            else:
                tm.assert_series_equal(a, b)

        # datetimeindex
        cases = [
            Series(date_range('20130101', periods=5), name='xxx'),
            Series(date_range('20130101', periods=5, freq='s'), name='xxx'),
            Series(date_range('20130101 00:00:00', periods=5, freq='ms'),
                   name='xxx')
        ]
        for s in cases:
            for prop in ok_for_dt:
                # we test freq below
                if prop != 'freq':
                    compare(s, prop)

            for prop in ok_for_dt_methods:
                getattr(s.dt, prop)

            result = s.dt.to_pydatetime()
            assert isinstance(result, np.ndarray)
            assert result.dtype == object

            result = s.dt.tz_localize('US/Eastern')
            exp_values = DatetimeIndex(s.values).tz_localize('US/Eastern')
            expected = Series(exp_values, index=s.index, name='xxx')
            tm.assert_series_equal(result, expected)

            tz_result = result.dt.tz
            assert str(tz_result) == 'US/Eastern'
            freq_result = s.dt.freq
            assert freq_result == DatetimeIndex(s.values, freq='infer').freq

            # let's localize, then convert
            result = s.dt.tz_localize('UTC').dt.tz_convert('US/Eastern')
            exp_values = (DatetimeIndex(
                s.values).tz_localize('UTC').tz_convert('US/Eastern'))
            expected = Series(exp_values, index=s.index, name='xxx')
            tm.assert_series_equal(result, expected)

        # datetimeindex with tz
        s = Series(date_range('20130101', periods=5, tz='US/Eastern'),
                   name='xxx')
        for prop in ok_for_dt:

            # we test freq below
            if prop != 'freq':
                compare(s, prop)

        for prop in ok_for_dt_methods:
            getattr(s.dt, prop)

        result = s.dt.to_pydatetime()
        assert isinstance(result, np.ndarray)
        assert result.dtype == object

        result = s.dt.tz_convert('CET')
        expected = Series(s._values.tz_convert('CET'),
                          index=s.index,
                          name='xxx')
        tm.assert_series_equal(result, expected)

        tz_result = result.dt.tz
        assert str(tz_result) == 'CET'
        freq_result = s.dt.freq
        assert freq_result == DatetimeIndex(s.values, freq='infer').freq

        # timedelta index
        cases = [
            Series(timedelta_range('1 day', periods=5),
                   index=list('abcde'),
                   name='xxx'),
            Series(timedelta_range('1 day 01:23:45', periods=5, freq='s'),
                   name='xxx'),
            Series(timedelta_range('2 days 01:23:45.012345',
                                   periods=5,
                                   freq='ms'),
                   name='xxx')
        ]
        for s in cases:
            for prop in ok_for_td:
                # we test freq below
                if prop != 'freq':
                    compare(s, prop)

            for prop in ok_for_td_methods:
                getattr(s.dt, prop)

            result = s.dt.components
            assert isinstance(result, DataFrame)
            tm.assert_index_equal(result.index, s.index)

            result = s.dt.to_pytimedelta()
            assert isinstance(result, np.ndarray)
            assert result.dtype == object

            result = s.dt.total_seconds()
            assert isinstance(result, pd.Series)
            assert result.dtype == 'float64'

            freq_result = s.dt.freq
            assert freq_result == TimedeltaIndex(s.values, freq='infer').freq

        # both
        index = date_range('20130101', periods=3, freq='D')
        s = Series(date_range('20140204', periods=3, freq='s'),
                   index=index,
                   name='xxx')
        exp = Series(np.array([2014, 2014, 2014], dtype='int64'),
                     index=index,
                     name='xxx')
        tm.assert_series_equal(s.dt.year, exp)

        exp = Series(np.array([2, 2, 2], dtype='int64'),
                     index=index,
                     name='xxx')
        tm.assert_series_equal(s.dt.month, exp)

        exp = Series(np.array([0, 1, 2], dtype='int64'),
                     index=index,
                     name='xxx')
        tm.assert_series_equal(s.dt.second, exp)

        exp = pd.Series([s[0]] * 3, index=index, name='xxx')
        tm.assert_series_equal(s.dt.normalize(), exp)

        # periodindex
        cases = [
            Series(period_range('20130101', periods=5, freq='D'), name='xxx')
        ]
        for s in cases:
            for prop in ok_for_period:
                # we test freq below
                if prop != 'freq':
                    compare(s, prop)

            for prop in ok_for_period_methods:
                getattr(s.dt, prop)

            freq_result = s.dt.freq
            assert freq_result == PeriodIndex(s.values).freq

        # test limited display api
        def get_dir(s):
            results = [r for r in s.dt.__dir__() if not r.startswith('_')]
            return list(sorted(set(results)))

        s = Series(date_range('20130101', periods=5, freq='D'), name='xxx')
        results = get_dir(s)
        tm.assert_almost_equal(
            results, list(sorted(set(ok_for_dt + ok_for_dt_methods))))

        s = Series(
            period_range('20130101', periods=5, freq='D',
                         name='xxx').astype(object))
        results = get_dir(s)
        tm.assert_almost_equal(
            results, list(sorted(set(ok_for_period + ok_for_period_methods))))

        # 11295
        # ambiguous time error on the conversions
        s = Series(pd.date_range('2015-01-01', '2016-01-01', freq='T'),
                   name='xxx')
        s = s.dt.tz_localize('UTC').dt.tz_convert('America/Chicago')
        results = get_dir(s)
        tm.assert_almost_equal(
            results, list(sorted(set(ok_for_dt + ok_for_dt_methods))))
        exp_values = pd.date_range('2015-01-01',
                                   '2016-01-01',
                                   freq='T',
                                   tz='UTC').tz_convert('America/Chicago')
        expected = Series(exp_values, name='xxx')
        tm.assert_series_equal(s, expected)

        # no setting allowed
        s = Series(date_range('20130101', periods=5, freq='D'), name='xxx')
        with pytest.raises(ValueError, match="modifications"):
            s.dt.hour = 5

        # trying to set a copy
        with pd.option_context('chained_assignment', 'raise'):
            with pytest.raises(com.SettingWithCopyError):
                s.dt.hour[0] = 5
Example #24
0
 def test_tdi_sub_timestamp_raises(self):
     idx = TimedeltaIndex(['1 day', '2 day'])
     msg = "cannot subtract a datelike from a TimedeltaIndex"
     with tm.assert_raises_regex(TypeError, msg):
         idx - Timestamp('2011-01-01')
Example #25
0
 def test_unit_m_y_raises(self, unit):
     msg = "Units 'M', 'Y', and 'y' are no longer supported"
     with pytest.raises(ValueError, match=msg):
         TimedeltaIndex([1, 3, 7], unit)
Example #26
0
    def test_tdi_radd_timestamp(self):
        idx = TimedeltaIndex(['1 day', '2 day'])

        result = Timestamp('2011-01-01') + idx
        expected = DatetimeIndex(['2011-01-02', '2011-01-03'])
        tm.assert_index_equal(result, expected)
Example #27
0
 def test_float64_unit_conversion(self):
     # GH#23539
     tdi = TimedeltaIndex([1.5, 2.25], unit="D")
     expected = TimedeltaIndex([Timedelta(days=1.5), Timedelta(days=2.25)])
     tm.assert_index_equal(tdi, expected)
Example #28
0
 def test_tdi_div_tdlike_scalar_with_nat(self, delta):
     rng = TimedeltaIndex(['1 days', pd.NaT, '2 days'], name='foo')
     expected = Float64Index([12, np.nan, 24], name='foo')
     result = rng / delta
     tm.assert_index_equal(result, expected)
Example #29
0
    def test_to_timedelta(self):
        def conv(v):
            return v.astype('m8[ns]')

        d1 = np.timedelta64(1, 'D')

        assert (to_timedelta('1 days 06:05:01.00003', box=False) ==
                conv(d1 + np.timedelta64(6 * 3600 + 5 * 60 + 1, 's') +
                     np.timedelta64(30, 'us')))
        assert (to_timedelta('15.5us', box=False) ==
                conv(np.timedelta64(15500, 'ns')))

        # empty string
        result = to_timedelta('', box=False)
        assert result.astype('int64') == iNaT

        result = to_timedelta(['', ''])
        assert isna(result).all()

        # pass thru
        result = to_timedelta(np.array([np.timedelta64(1, 's')]))
        expected = pd.Index(np.array([np.timedelta64(1, 's')]))
        tm.assert_index_equal(result, expected)

        # ints
        result = np.timedelta64(0, 'ns')
        expected = to_timedelta(0, box=False)
        assert result == expected

        # Series
        expected = Series([timedelta(days=1), timedelta(days=1, seconds=1)])
        result = to_timedelta(Series(['1d', '1days 00:00:01']))
        tm.assert_series_equal(result, expected)

        # with units
        result = TimedeltaIndex([np.timedelta64(0, 'ns'), np.timedelta64(
            10, 's').astype('m8[ns]')])
        expected = to_timedelta([0, 10], unit='s')
        tm.assert_index_equal(result, expected)

        # single element conversion
        v = timedelta(seconds=1)
        result = to_timedelta(v, box=False)
        expected = np.timedelta64(timedelta(seconds=1))
        assert result == expected

        v = np.timedelta64(timedelta(seconds=1))
        result = to_timedelta(v, box=False)
        expected = np.timedelta64(timedelta(seconds=1))
        assert result == expected

        # arrays of various dtypes
        arr = np.array([1] * 5, dtype='int64')
        result = to_timedelta(arr, unit='s')
        expected = TimedeltaIndex([np.timedelta64(1, 's')] * 5)
        tm.assert_index_equal(result, expected)

        arr = np.array([1] * 5, dtype='int64')
        result = to_timedelta(arr, unit='m')
        expected = TimedeltaIndex([np.timedelta64(1, 'm')] * 5)
        tm.assert_index_equal(result, expected)

        arr = np.array([1] * 5, dtype='int64')
        result = to_timedelta(arr, unit='h')
        expected = TimedeltaIndex([np.timedelta64(1, 'h')] * 5)
        tm.assert_index_equal(result, expected)

        arr = np.array([1] * 5, dtype='timedelta64[s]')
        result = to_timedelta(arr)
        expected = TimedeltaIndex([np.timedelta64(1, 's')] * 5)
        tm.assert_index_equal(result, expected)

        arr = np.array([1] * 5, dtype='timedelta64[D]')
        result = to_timedelta(arr)
        expected = TimedeltaIndex([np.timedelta64(1, 'D')] * 5)
        tm.assert_index_equal(result, expected)

        # Test with lists as input when box=false
        expected = np.array(np.arange(3) * 1000000000, dtype='timedelta64[ns]')
        result = to_timedelta(range(3), unit='s', box=False)
        tm.assert_numpy_array_equal(expected, result)

        result = to_timedelta(np.arange(3), unit='s', box=False)
        tm.assert_numpy_array_equal(expected, result)

        result = to_timedelta([0, 1, 2], unit='s', box=False)
        tm.assert_numpy_array_equal(expected, result)

        # Tests with fractional seconds as input:
        expected = np.array(
            [0, 500000000, 800000000, 1200000000], dtype='timedelta64[ns]')
        result = to_timedelta([0., 0.5, 0.8, 1.2], unit='s', box=False)
        tm.assert_numpy_array_equal(expected, result)
Example #30
0
 def test_insert_nat(self, null):
     # GH 18295 (test missing)
     idx = timedelta_range("1day", "3day")
     result = idx.insert(1, null)
     expected = TimedeltaIndex(["1day", pd.NaT, "2day", "3day"])
     tm.assert_index_equal(result, expected)