Example #1
0
    def test_ambiguous_infer(self):
        # November 6, 2011, fall back, repeat 2 AM hour
        # With no repeated hours, we cannot infer the transition
        tz = self.tz('US/Eastern')
        dr = date_range(datetime(2011, 11, 6, 0), periods=5,
                        freq=datetools.Hour())
        self.assertRaises(pytz.AmbiguousTimeError, dr.tz_localize, tz)

        # With repeated hours, we can infer the transition
        dr = date_range(datetime(2011, 11, 6, 0), periods=5,
                        freq=datetools.Hour(), tz=tz)
        times = ['11/06/2011 00:00', '11/06/2011 01:00',
                 '11/06/2011 01:00', '11/06/2011 02:00',
                 '11/06/2011 03:00']
        di = DatetimeIndex(times)
        localized = di.tz_localize(tz, ambiguous='infer')
        self.assert_numpy_array_equal(dr, localized)
        with tm.assert_produces_warning(FutureWarning):
            localized_old = di.tz_localize(tz, infer_dst=True)
        self.assert_numpy_array_equal(dr, localized_old)
        self.assert_numpy_array_equal(dr, DatetimeIndex(times, tz=tz, ambiguous='infer'))

        # When there is no dst transition, nothing special happens
        dr = date_range(datetime(2011, 6, 1, 0), periods=10,
                        freq=datetools.Hour())
        localized = dr.tz_localize(tz)
        localized_infer = dr.tz_localize(tz, ambiguous='infer')
        self.assert_numpy_array_equal(localized, localized_infer)
        with tm.assert_produces_warning(FutureWarning):
            localized_infer_old = dr.tz_localize(tz, infer_dst=True)
        self.assert_numpy_array_equal(localized, localized_infer_old)
Example #2
0
 def test_reasonable_keyerror(self):
     # GH #1062
     index = DatetimeIndex(["1/3/2000"])
     try:
         index.get_loc("1/1/2000")
     except KeyError, e:
         self.assert_("2000" in str(e))
Example #3
0
 def test_dti_tz_localize_nonexistent_shift_invalid(self, offset, tz_type):
     # GH 8917
     tz = tz_type + 'Europe/Warsaw'
     dti = DatetimeIndex([Timestamp('2015-03-29 02:20:00')])
     msg = "The provided timedelta will relocalize on a nonexistent time"
     with pytest.raises(ValueError, match=msg):
         dti.tz_localize(tz, nonexistent=timedelta(seconds=offset))
Example #4
0
def construct_1d_arraylike_from_scalar(value, length, dtype):
    """
    create a np.ndarray / pandas type of specified shape and dtype
    filled with values

    Parameters
    ----------
    value : scalar value
    length : int
    dtype : pandas_dtype / np.dtype

    Returns
    -------
    np.ndarray / pandas type of length, filled with value

    """
    if is_datetimetz(dtype):
        from pandas import DatetimeIndex
        subarr = DatetimeIndex([value] * length, dtype=dtype)
    elif is_categorical_dtype(dtype):
        from pandas import Categorical
        subarr = Categorical([value] * length)
    else:
        if not isinstance(dtype, (np.dtype, type(np.dtype))):
            dtype = dtype.dtype

        # coerce if we have nan for an integer dtype
        if is_integer_dtype(dtype) and isna(value):
            dtype = np.float64
        subarr = np.empty(length, dtype=dtype)
        subarr.fill(value)

    return subarr
Example #5
0
    def test_dti_tz_localize_ambiguous_infer(self, tz):
        # November 6, 2011, fall back, repeat 2 AM hour
        # With no repeated hours, we cannot infer the transition
        dr = date_range(datetime(2011, 11, 6, 0), periods=5,
                        freq=pd.offsets.Hour())
        with pytest.raises(pytz.AmbiguousTimeError):
            dr.tz_localize(tz)

        # With repeated hours, we can infer the transition
        dr = date_range(datetime(2011, 11, 6, 0), periods=5,
                        freq=pd.offsets.Hour(), tz=tz)
        times = ['11/06/2011 00:00', '11/06/2011 01:00', '11/06/2011 01:00',
                 '11/06/2011 02:00', '11/06/2011 03:00']
        di = DatetimeIndex(times)
        localized = di.tz_localize(tz, ambiguous='infer')
        tm.assert_index_equal(dr, localized)
        tm.assert_index_equal(dr, DatetimeIndex(times, tz=tz,
                                                ambiguous='infer'))

        # When there is no dst transition, nothing special happens
        dr = date_range(datetime(2011, 6, 1, 0), periods=10,
                        freq=pd.offsets.Hour())
        localized = dr.tz_localize(tz)
        localized_infer = dr.tz_localize(tz, ambiguous='infer')
        tm.assert_index_equal(localized, localized_infer)
Example #6
0
    def test_tz_localize_dti(self):
        from pandas.tseries.offsets import Hour

        dti = DatetimeIndex(start='1/1/2005', end='1/1/2005 0:00:30.256',
                            freq='L')
        dti2 = dti.tz_localize('US/Eastern')

        dti_utc = DatetimeIndex(start='1/1/2005 05:00',
                                end='1/1/2005 5:00:30.256', freq='L',
                                tz='utc')

        self.assert_(np.array_equal(dti2.values, dti_utc.values))

        dti3 = dti2.tz_convert('US/Pacific')
        self.assert_(np.array_equal(dti3.values, dti_utc.values))

        dti = DatetimeIndex(start='11/6/2011 1:59',
                            end='11/6/2011 2:00', freq='L')
        self.assertRaises(pytz.AmbiguousTimeError, dti.tz_localize,
                          'US/Eastern')

        dti = DatetimeIndex(start='3/13/2011 1:59', end='3/13/2011 2:00',
                            freq='L')
        self.assertRaises(pytz.AmbiguousTimeError, dti.tz_localize,
                          'US/Eastern')
Example #7
0
    def test_round_int64(self, start, index_freq, periods, round_freq):
        dt = DatetimeIndex(start=start, freq=index_freq, periods=periods)
        unit = to_offset(round_freq).nanos

        # test floor
        result = dt.floor(round_freq)
        diff = dt.asi8 - result.asi8
        mod = result.asi8 % unit
        assert (mod == 0).all(), "floor not a {} multiple".format(round_freq)
        assert (0 <= diff).all() and (diff < unit).all(), "floor error"

        # test ceil
        result = dt.ceil(round_freq)
        diff = result.asi8 - dt.asi8
        mod = result.asi8 % unit
        assert (mod == 0).all(), "ceil not a {} multiple".format(round_freq)
        assert (0 <= diff).all() and (diff < unit).all(), "ceil error"

        # test round
        result = dt.round(round_freq)
        diff = abs(result.asi8 - dt.asi8)
        mod = result.asi8 % unit
        assert (mod == 0).all(), "round not a {} multiple".format(round_freq)
        assert (diff <= unit // 2).all(), "round error"
        if unit % 2 == 0:
            assert (
                result.asi8[diff == unit // 2] % 2 == 0
            ).all(), "round half to even error"
Example #8
0
    def test_get_duplicates(self):
        idx = DatetimeIndex(['2000-01-01', '2000-01-02', '2000-01-02',
                             '2000-01-03', '2000-01-03', '2000-01-04'])

        result = idx.get_duplicates()
        ex = DatetimeIndex(['2000-01-02', '2000-01-03'])
        tm.assert_index_equal(result, ex)
Example #9
0
 def test_reasonable_keyerror(self):
     # GH #1062
     index = DatetimeIndex(['1/3/2000'])
     try:
         index.get_loc('1/1/2000')
     except KeyError as e:
         assert '2000' in str(e)
Example #10
0
    def test_map_bug_1677(self):
        index = DatetimeIndex(['2012-04-25 09:30:00.393000'])
        f = index.asof

        result = index.map(f)
        expected = Index([f(index[0])])
        tm.assert_index_equal(result, expected)
Example #11
0
    def test_order_without_freq(self, index_dates, expected_dates, tz_fixture):
        tz = tz_fixture

        # without freq
        index = DatetimeIndex(index_dates, tz=tz, name='idx')
        expected = DatetimeIndex(expected_dates, tz=tz, name='idx')

        ordered = index.sort_values()
        tm.assert_index_equal(ordered, expected)
        assert ordered.freq is None

        ordered = index.sort_values(ascending=False)
        tm.assert_index_equal(ordered, expected[::-1])
        assert ordered.freq is None

        ordered, indexer = index.sort_values(return_indexer=True)
        tm.assert_index_equal(ordered, expected)

        exp = np.array([0, 4, 3, 1, 2])
        tm.assert_numpy_array_equal(indexer, exp, check_dtype=False)
        assert ordered.freq is None

        ordered, indexer = index.sort_values(return_indexer=True,
                                             ascending=False)
        tm.assert_index_equal(ordered, expected[::-1])

        exp = np.array([2, 1, 3, 4, 0])
        tm.assert_numpy_array_equal(indexer, exp, check_dtype=False)
        assert ordered.freq is None
Example #12
0
    def test_isnull_datetime(self):
        self.assertFalse(isnull(datetime.now()))
        self.assertTrue(notnull(datetime.now()))

        idx = date_range('1/1/1990', periods=20)
        exp = np.ones(len(idx), dtype=bool)
        tm.assert_numpy_array_equal(notnull(idx), exp)

        idx = np.asarray(idx)
        idx[0] = iNaT
        idx = DatetimeIndex(idx)
        mask = isnull(idx)
        self.assertTrue(mask[0])
        exp = np.array([True] + [False] * (len(idx) - 1), dtype=bool)
        tm.assert_numpy_array_equal(mask, exp)

        # GH 9129
        pidx = idx.to_period(freq='M')
        mask = isnull(pidx)
        self.assertTrue(mask[0])
        exp = np.array([True] + [False] * (len(idx) - 1), dtype=bool)
        tm.assert_numpy_array_equal(mask, exp)

        mask = isnull(pidx[1:])
        exp = np.zeros(len(mask), dtype=bool)
        tm.assert_numpy_array_equal(mask, exp)
Example #13
0
    def test_astype_str_compat(self):
        # GH 13149, GH 13209
        # verify that we are returning NaT as a string (and not unicode)

        idx = DatetimeIndex(['2016-05-16', 'NaT', NaT, np.NaN])
        result = idx.astype(str)
        expected = Index(['2016-05-16', 'NaT', 'NaT', 'NaT'], dtype=object)
        tm.assert_index_equal(result, expected)
Example #14
0
    def test_series_tz_convert_to_utc(self):
        base = DatetimeIndex(['2011-01-01', '2011-01-02', '2011-01-03'],
                             tz='UTC')
        idx1 = base.tz_convert('Asia/Tokyo')[:2]
        idx2 = base.tz_convert('US/Eastern')[1:]

        res = Series([1, 2], index=idx1) + Series([1, 1], index=idx2)
        tm.assert_series_equal(res, Series([np.nan, 3, np.nan], index=base))
Example #15
0
    def test_index_conversion(self):
        index = self.frame.index
        obj_index = index.asobject

        conv = DatetimeIndex(obj_index)
        self.assert_(conv.equals(index))

        self.assertRaises(ValueError, DatetimeIndex, ["a", "b", "c", "d"])
Example #16
0
    def test_dti_tz_convert_compat_timestamp(self, prefix):
        strdates = ['1/1/2012', '3/1/2012', '4/1/2012']
        idx = DatetimeIndex(strdates, tz=prefix + 'US/Eastern')

        conv = idx[0].tz_convert(prefix + 'US/Pacific')
        expected = idx.tz_convert(prefix + 'US/Pacific')[0]

        assert conv == expected
    def test_timestamp_tz_convert(self):
        strdates = ["1/1/2012", "3/1/2012", "4/1/2012"]
        idx = DatetimeIndex(strdates, tz=self.tzstr("US/Eastern"))

        conv = idx[0].tz_convert(self.tzstr("US/Pacific"))
        expected = idx.tz_convert(self.tzstr("US/Pacific"))[0]

        self.assertEqual(conv, expected)
Example #18
0
    def test_timestamp_tz_convert(self):
        strdates = ['1/1/2012', '3/1/2012', '4/1/2012']
        idx = DatetimeIndex(strdates, tz='US/Eastern')

        conv = idx[0].tz_convert('US/Pacific')
        expected = idx.tz_convert('US/Pacific')[0]

        self.assertEquals(conv, expected)
    def test_index_conversion(self):
        index = self.frame.index
        obj_index = index.asobject

        conv = DatetimeIndex(obj_index)
        self.assertTrue(conv.equals(index))

        self.assertRaises(ValueError, DatetimeIndex, ['a', 'b', 'c', 'd'])
Example #20
0
    def test_union_coverage(self, sort):
        idx = DatetimeIndex(['2000-01-03', '2000-01-01', '2000-01-02'])
        ordered = DatetimeIndex(idx.sort_values(), freq='infer')
        result = ordered.union(idx, sort=sort)
        tm.assert_index_equal(result, ordered)

        result = ordered[:0].union(ordered, sort=sort)
        tm.assert_index_equal(result, ordered)
        assert result.freq == ordered.freq
    def test_ambiguous_nat(self):
        tz = self.tz("US/Eastern")
        times = ["11/06/2011 00:00", "11/06/2011 01:00", "11/06/2011 01:00", "11/06/2011 02:00", "11/06/2011 03:00"]
        di = DatetimeIndex(times)
        localized = di.tz_localize(tz, ambiguous="NaT")

        times = ["11/06/2011 00:00", np.NaN, np.NaN, "11/06/2011 02:00", "11/06/2011 03:00"]
        di_test = DatetimeIndex(times, tz="US/Eastern")
        self.assert_numpy_array_equal(di_test, localized)
Example #22
0
    def test_union_bug_1745(self):
        left = DatetimeIndex(['2012-05-11 15:19:49.695000'])
        right = DatetimeIndex(['2012-05-29 13:04:21.322000',
                               '2012-05-11 15:27:24.873000',
                               '2012-05-11 15:31:05.350000'])

        result = left.union(right)
        exp = DatetimeIndex(sorted(set(list(left)) | set(list(right))))
        tm.assert_index_equal(result, exp)
Example #23
0
    def test_get_duplicates(self):
        idx = DatetimeIndex(['2000-01-01', '2000-01-02', '2000-01-02',
                             '2000-01-03', '2000-01-03', '2000-01-04'])

        with tm.assert_produces_warning(FutureWarning):
            # Deprecated - see GH20239
            result = idx.get_duplicates()

        ex = DatetimeIndex(['2000-01-02', '2000-01-03'])
        tm.assert_index_equal(result, ex)
Example #24
0
    def test_datetimeindex_union_join_empty(self):
        dti = DatetimeIndex(start="1/1/2001", end="2/1/2001", freq="D")
        empty = Index([])

        result = dti.union(empty)
        self.assert_(isinstance(result, DatetimeIndex))
        self.assert_(result is result)

        result = dti.join(empty)
        self.assert_(isinstance(result, DatetimeIndex))
Example #25
0
    def test_pass_dates_localize_to_utc(self):
        strdates = ['1/1/2012', '3/1/2012', '4/1/2012']

        idx = DatetimeIndex(strdates)
        conv = idx.tz_localize('US/Eastern')

        fromdates = DatetimeIndex(strdates, tz='US/Eastern')

        self.assert_(conv.tz == fromdates.tz)
        self.assert_(np.array_equal(conv.values, fromdates.values))
Example #26
0
    def test_to_timestamp_to_period_astype(self):
        idx = DatetimeIndex([pd.NaT, '2011-01-01', '2011-02-01'], name='idx')

        res = idx.astype('period[M]')
        exp = PeriodIndex(['NaT', '2011-01', '2011-02'], freq='M', name='idx')
        tm.assert_index_equal(res, exp)

        res = idx.astype('period[3M]')
        exp = PeriodIndex(['NaT', '2011-01', '2011-02'], freq='3M', name='idx')
        tm.assert_index_equal(res, exp)
Example #27
0
    def test_misc_coverage(self):
        rng = date_range('1/1/2000', periods=5)
        result = rng.groupby(rng.day)
        assert isinstance(list(result.values())[0][0], Timestamp)

        idx = DatetimeIndex(['2000-01-03', '2000-01-01', '2000-01-02'])
        assert not idx.equals(list(idx))

        non_datetime = Index(list('abc'))
        assert not idx.equals(list(non_datetime))
Example #28
0
 def test_astype_object_with_nat(self):
     idx = DatetimeIndex([datetime(2013, 1, 1), datetime(2013, 1, 2),
                          pd.NaT, datetime(2013, 1, 4)], name='idx')
     expected_list = [Timestamp('2013-01-01'),
                      Timestamp('2013-01-02'), pd.NaT,
                      Timestamp('2013-01-04')]
     expected = pd.Index(expected_list, dtype=object, name='idx')
     result = idx.astype(object)
     tm.assert_index_equal(result, expected)
     assert idx.tolist() == expected_list
Example #29
0
    def test_dti_tz_localize_pass_dates_to_utc(self, tzstr):
        strdates = ['1/1/2012', '3/1/2012', '4/1/2012']

        idx = DatetimeIndex(strdates)
        conv = idx.tz_localize(tzstr)

        fromdates = DatetimeIndex(strdates, tz=tzstr)

        assert conv.tz == fromdates.tz
        tm.assert_numpy_array_equal(conv.values, fromdates.values)
    def test_pass_dates_localize_to_utc(self):
        strdates = ["1/1/2012", "3/1/2012", "4/1/2012"]

        idx = DatetimeIndex(strdates)
        conv = idx.tz_localize(self.tzstr("US/Eastern"))

        fromdates = DatetimeIndex(strdates, tz=self.tzstr("US/Eastern"))

        self.assertEqual(conv.tz, fromdates.tz)
        self.assert_numpy_array_equal(conv.values, fromdates.values)
Example #31
0
    def test_dti_from_tzaware_datetime(self, tz):
        d = [datetime(2012, 8, 19, tzinfo=tz)]

        index = DatetimeIndex(d)
        assert timezones.tz_compare(index.tz, tz)
Example #32
0
 def test_dti_tz_conversion_freq(self, tz_naive_fixture):
     # GH25241
     t3 = DatetimeIndex(["2019-01-01 10:00"], freq="H")
     assert t3.tz_localize(tz=tz_naive_fixture).freq == t3.freq
     t4 = DatetimeIndex(["2019-01-02 12:00"], tz="UTC", freq="T")
     assert t4.tz_convert(tz="UTC").freq == t4.freq
Example #33
0
 def test_cdaterange(self):
     rng = cdate_range('2013-05-01', periods=3)
     xp = DatetimeIndex(['2013-05-01', '2013-05-02', '2013-05-03'])
     tm.assert_index_equal(xp, rng)
Example #34
0
    def test_datetimeindex_accessors(self):
        dti_naive = pd.date_range(freq='D',
                                  start=datetime(1998, 1, 1),
                                  periods=365)
        # GH#13303
        dti_tz = pd.date_range(freq='D',
                               start=datetime(1998, 1, 1),
                               periods=365,
                               tz='US/Eastern')
        for dti in [dti_naive, dti_tz]:

            assert dti.year[0] == 1998
            assert dti.month[0] == 1
            assert dti.day[0] == 1
            assert dti.hour[0] == 0
            assert dti.minute[0] == 0
            assert dti.second[0] == 0
            assert dti.microsecond[0] == 0
            assert dti.dayofweek[0] == 3

            assert dti.dayofyear[0] == 1
            assert dti.dayofyear[120] == 121

            assert dti.weekofyear[0] == 1
            assert dti.weekofyear[120] == 18

            assert dti.quarter[0] == 1
            assert dti.quarter[120] == 2

            assert dti.days_in_month[0] == 31
            assert dti.days_in_month[90] == 30

            assert dti.is_month_start[0]
            assert not dti.is_month_start[1]
            assert dti.is_month_start[31]
            assert dti.is_quarter_start[0]
            assert dti.is_quarter_start[90]
            assert dti.is_year_start[0]
            assert not dti.is_year_start[364]
            assert not dti.is_month_end[0]
            assert dti.is_month_end[30]
            assert not dti.is_month_end[31]
            assert dti.is_month_end[364]
            assert not dti.is_quarter_end[0]
            assert not dti.is_quarter_end[30]
            assert dti.is_quarter_end[89]
            assert dti.is_quarter_end[364]
            assert not dti.is_year_end[0]
            assert dti.is_year_end[364]

            assert len(dti.year) == 365
            assert len(dti.month) == 365
            assert len(dti.day) == 365
            assert len(dti.hour) == 365
            assert len(dti.minute) == 365
            assert len(dti.second) == 365
            assert len(dti.microsecond) == 365
            assert len(dti.dayofweek) == 365
            assert len(dti.dayofyear) == 365
            assert len(dti.weekofyear) == 365
            assert len(dti.quarter) == 365
            assert len(dti.is_month_start) == 365
            assert len(dti.is_month_end) == 365
            assert len(dti.is_quarter_start) == 365
            assert len(dti.is_quarter_end) == 365
            assert len(dti.is_year_start) == 365
            assert len(dti.is_year_end) == 365
            assert len(dti.weekday_name) == 365

            dti.name = 'name'

            # non boolean accessors -> return Index
            for accessor in DatetimeIndex._field_ops:
                res = getattr(dti, accessor)
                assert len(res) == 365
                assert isinstance(res, Index)
                assert res.name == 'name'

            # boolean accessors -> return array
            for accessor in DatetimeIndex._bool_ops:
                res = getattr(dti, accessor)
                assert len(res) == 365
                assert isinstance(res, np.ndarray)

            # test boolean indexing
            res = dti[dti.is_quarter_start]
            exp = dti[[0, 90, 181, 273]]
            tm.assert_index_equal(res, exp)
            res = dti[dti.is_leap_year]
            exp = DatetimeIndex([], freq='D', tz=dti.tz, name='name')
            tm.assert_index_equal(res, exp)

        dti = pd.date_range(freq='BQ-FEB',
                            start=datetime(1998, 1, 1),
                            periods=4)

        assert sum(dti.is_quarter_start) == 0
        assert sum(dti.is_quarter_end) == 4
        assert sum(dti.is_year_start) == 0
        assert sum(dti.is_year_end) == 1

        # Ensure is_start/end accessors throw ValueError for CustomBusinessDay,
        bday_egypt = offsets.CustomBusinessDay(weekmask='Sun Mon Tue Wed Thu')
        dti = date_range(datetime(2013, 4, 30), periods=5, freq=bday_egypt)
        msg = "Custom business days is not supported by is_month_start"
        with pytest.raises(ValueError, match=msg):
            dti.is_month_start

        dti = DatetimeIndex(['2000-01-01', '2000-01-02', '2000-01-03'])

        assert dti.is_month_start[0] == 1

        tests = [(Timestamp('2013-06-01', freq='M').is_month_start, 1),
                 (Timestamp('2013-06-01', freq='BM').is_month_start, 0),
                 (Timestamp('2013-06-03', freq='M').is_month_start, 0),
                 (Timestamp('2013-06-03', freq='BM').is_month_start, 1),
                 (Timestamp('2013-02-28', freq='Q-FEB').is_month_end, 1),
                 (Timestamp('2013-02-28', freq='Q-FEB').is_quarter_end, 1),
                 (Timestamp('2013-02-28', freq='Q-FEB').is_year_end, 1),
                 (Timestamp('2013-03-01', freq='Q-FEB').is_month_start, 1),
                 (Timestamp('2013-03-01', freq='Q-FEB').is_quarter_start, 1),
                 (Timestamp('2013-03-01', freq='Q-FEB').is_year_start, 1),
                 (Timestamp('2013-03-31', freq='QS-FEB').is_month_end, 1),
                 (Timestamp('2013-03-31', freq='QS-FEB').is_quarter_end, 0),
                 (Timestamp('2013-03-31', freq='QS-FEB').is_year_end, 0),
                 (Timestamp('2013-02-01', freq='QS-FEB').is_month_start, 1),
                 (Timestamp('2013-02-01', freq='QS-FEB').is_quarter_start, 1),
                 (Timestamp('2013-02-01', freq='QS-FEB').is_year_start, 1),
                 (Timestamp('2013-06-30', freq='BQ').is_month_end, 0),
                 (Timestamp('2013-06-30', freq='BQ').is_quarter_end, 0),
                 (Timestamp('2013-06-30', freq='BQ').is_year_end, 0),
                 (Timestamp('2013-06-28', freq='BQ').is_month_end, 1),
                 (Timestamp('2013-06-28', freq='BQ').is_quarter_end, 1),
                 (Timestamp('2013-06-28', freq='BQ').is_year_end, 0),
                 (Timestamp('2013-06-30', freq='BQS-APR').is_month_end, 0),
                 (Timestamp('2013-06-30', freq='BQS-APR').is_quarter_end, 0),
                 (Timestamp('2013-06-30', freq='BQS-APR').is_year_end, 0),
                 (Timestamp('2013-06-28', freq='BQS-APR').is_month_end, 1),
                 (Timestamp('2013-06-28', freq='BQS-APR').is_quarter_end, 1),
                 (Timestamp('2013-03-29', freq='BQS-APR').is_year_end, 1),
                 (Timestamp('2013-11-01', freq='AS-NOV').is_year_start, 1),
                 (Timestamp('2013-10-31', freq='AS-NOV').is_year_end, 1),
                 (Timestamp('2012-02-01').days_in_month, 29),
                 (Timestamp('2013-02-01').days_in_month, 28)]

        for ts, value in tests:
            assert ts == value

        # GH 6538: Check that DatetimeIndex and its TimeStamp elements
        # return the same weekofyear accessor close to new year w/ tz
        dates = ["2013/12/29", "2013/12/30", "2013/12/31"]
        dates = DatetimeIndex(dates, tz="Europe/Brussels")
        expected = [52, 1, 1]
        assert dates.weekofyear.tolist() == expected
        assert [d.weekofyear for d in dates] == expected
Example #35
0
    def test_construction_index_with_mixed_timezones_with_NaT(self):
        # GH 11488
        result = Index(
            [pd.NaT,
             Timestamp('2011-01-01'), pd.NaT,
             Timestamp('2011-01-02')],
            name='idx')
        exp = DatetimeIndex(
            [pd.NaT,
             Timestamp('2011-01-01'), pd.NaT,
             Timestamp('2011-01-02')],
            name='idx')
        self.assert_index_equal(result, exp, exact=True)
        self.assertTrue(isinstance(result, DatetimeIndex))
        self.assertIsNone(result.tz)

        # same tz results in DatetimeIndex
        result = Index([
            pd.NaT,
            Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'), pd.NaT,
            Timestamp('2011-01-02 10:00', tz='Asia/Tokyo')
        ],
                       name='idx')
        exp = DatetimeIndex([
            pd.NaT,
            Timestamp('2011-01-01 10:00'), pd.NaT,
            Timestamp('2011-01-02 10:00')
        ],
                            tz='Asia/Tokyo',
                            name='idx')
        self.assert_index_equal(result, exp, exact=True)
        self.assertTrue(isinstance(result, DatetimeIndex))
        self.assertIsNotNone(result.tz)
        self.assertEqual(result.tz, exp.tz)

        # same tz results in DatetimeIndex (DST)
        result = Index([
            Timestamp('2011-01-01 10:00', tz='US/Eastern'), pd.NaT,
            Timestamp('2011-08-01 10:00', tz='US/Eastern')
        ],
                       name='idx')
        exp = DatetimeIndex([
            Timestamp('2011-01-01 10:00'), pd.NaT,
            Timestamp('2011-08-01 10:00')
        ],
                            tz='US/Eastern',
                            name='idx')
        self.assert_index_equal(result, exp, exact=True)
        self.assertTrue(isinstance(result, DatetimeIndex))
        self.assertIsNotNone(result.tz)
        self.assertEqual(result.tz, exp.tz)

        # different tz results in Index(dtype=object)
        result = Index([
            pd.NaT,
            Timestamp('2011-01-01 10:00'), pd.NaT,
            Timestamp('2011-01-02 10:00', tz='US/Eastern')
        ],
                       name='idx')
        exp = Index([
            pd.NaT,
            Timestamp('2011-01-01 10:00'), pd.NaT,
            Timestamp('2011-01-02 10:00', tz='US/Eastern')
        ],
                    dtype='object',
                    name='idx')
        self.assert_index_equal(result, exp, exact=True)
        self.assertFalse(isinstance(result, DatetimeIndex))

        result = Index([
            pd.NaT,
            Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'), pd.NaT,
            Timestamp('2011-01-02 10:00', tz='US/Eastern')
        ],
                       name='idx')
        exp = Index([
            pd.NaT,
            Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'), pd.NaT,
            Timestamp('2011-01-02 10:00', tz='US/Eastern')
        ],
                    dtype='object',
                    name='idx')
        self.assert_index_equal(result, exp, exact=True)
        self.assertFalse(isinstance(result, DatetimeIndex))

        # all NaT
        result = Index([pd.NaT, pd.NaT], name='idx')
        exp = DatetimeIndex([pd.NaT, pd.NaT], name='idx')
        self.assert_index_equal(result, exp, exact=True)
        self.assertTrue(isinstance(result, DatetimeIndex))
        self.assertIsNone(result.tz)

        # all NaT with tz
        result = Index([pd.NaT, pd.NaT], tz='Asia/Tokyo', name='idx')
        exp = DatetimeIndex([pd.NaT, pd.NaT], tz='Asia/Tokyo', name='idx')
        self.assert_index_equal(result, exp, exact=True)
        self.assertTrue(isinstance(result, DatetimeIndex))
        self.assertIsNotNone(result.tz)
        self.assertEqual(result.tz, exp.tz)
Example #36
0
def test_array_equivalent():
    assert array_equivalent(np.array([np.nan, np.nan]),
                            np.array([np.nan, np.nan]))
    assert array_equivalent(np.array([np.nan, 1, np.nan]),
                            np.array([np.nan, 1, np.nan]))
    assert array_equivalent(
        np.array([np.nan, None], dtype="object"),
        np.array([np.nan, None], dtype="object"),
    )
    # Check the handling of nested arrays in array_equivalent_object
    assert array_equivalent(
        np.array([np.array([np.nan, None], dtype="object"), None],
                 dtype="object"),
        np.array([np.array([np.nan, None], dtype="object"), None],
                 dtype="object"),
    )
    assert array_equivalent(
        np.array([np.nan, 1 + 1j], dtype="complex"),
        np.array([np.nan, 1 + 1j], dtype="complex"),
    )
    assert not array_equivalent(
        np.array([np.nan, 1 + 1j], dtype="complex"),
        np.array([np.nan, 1 + 2j], dtype="complex"),
    )
    assert not array_equivalent(np.array([np.nan, 1, np.nan]),
                                np.array([np.nan, 2, np.nan]))
    assert not array_equivalent(np.array(["a", "b", "c", "d"]),
                                np.array(["e", "e"]))
    assert array_equivalent(Float64Index([0, np.nan]),
                            Float64Index([0, np.nan]))
    assert not array_equivalent(Float64Index([0, np.nan]),
                                Float64Index([1, np.nan]))
    assert array_equivalent(DatetimeIndex([0, np.nan]),
                            DatetimeIndex([0, np.nan]))
    assert not array_equivalent(DatetimeIndex([0, np.nan]),
                                DatetimeIndex([1, np.nan]))
    assert array_equivalent(TimedeltaIndex([0, np.nan]),
                            TimedeltaIndex([0, np.nan]))
    assert not array_equivalent(TimedeltaIndex([0, np.nan]),
                                TimedeltaIndex([1, np.nan]))
    assert array_equivalent(
        DatetimeIndex([0, np.nan], tz="US/Eastern"),
        DatetimeIndex([0, np.nan], tz="US/Eastern"),
    )
    assert not array_equivalent(
        DatetimeIndex([0, np.nan], tz="US/Eastern"),
        DatetimeIndex([1, np.nan], tz="US/Eastern"),
    )
    assert not array_equivalent(DatetimeIndex([0, np.nan]),
                                DatetimeIndex([0, np.nan], tz="US/Eastern"))
    assert not array_equivalent(
        DatetimeIndex([0, np.nan], tz="CET"),
        DatetimeIndex([0, np.nan], tz="US/Eastern"),
    )

    assert not array_equivalent(DatetimeIndex([0, np.nan]),
                                TimedeltaIndex([0, np.nan]))
Example #37
0
 def test_dti_constructor_static_tzinfo(self, prefix):
     # it works!
     index = DatetimeIndex([datetime(2012, 1, 1)], tz=prefix + "EST")
     index.hour
     index[0]
Example #38
0
def factorize(values, sort=False, order=None, na_sentinel=-1, size_hint=None):
    """
    Encode input values as an enumerated type or categorical variable

    Parameters
    ----------
    values : ndarray (1-d)
        Sequence
    sort : boolean, default False
        Sort by values
    na_sentinel : int, default -1
        Value to mark "not found"
    size_hint : hint to the hashtable sizer

    Returns
    -------
    labels : the indexer to the original array
    uniques : ndarray (1-d) or Index
        the unique values. Index is returned when passed values is Index or
        Series

    note: an array of Periods will ignore sort as it returns an always sorted
    PeriodIndex
    """
    from pandas import Index, Series, DatetimeIndex, PeriodIndex

    # handling two possibilities here
    # - for a numpy datetimelike simply view as i8 then cast back
    # - for an extension datetimelike view as i8 then
    #   reconstruct from boxed values to transfer metadata
    dtype = None
    if needs_i8_conversion(values):
        if is_period_dtype(values):
            values = PeriodIndex(values)
            vals = values.asi8
        elif is_datetimetz(values):
            values = DatetimeIndex(values)
            vals = values.asi8
        else:
            # numpy dtype
            dtype = values.dtype
            vals = values.view(np.int64)
    else:
        vals = np.asarray(values)

    (hash_klass, vec_klass), vals = _get_data_algo(vals, _hashtables)

    table = hash_klass(size_hint or len(vals))
    uniques = vec_klass()
    labels = table.get_labels(vals, uniques, 0, na_sentinel, True)

    labels = _ensure_platform_int(labels)

    uniques = uniques.to_array()

    if sort and len(uniques) > 0:
        uniques, labels = safe_sort(uniques,
                                    labels,
                                    na_sentinel=na_sentinel,
                                    assume_unique=True)

    if dtype is not None:
        uniques = uniques.astype(dtype)

    if isinstance(values, Index):
        uniques = values._shallow_copy(uniques, name=None)
    elif isinstance(values, Series):
        uniques = Index(uniques)
    return labels, uniques
Example #39
0
def test_datetimeindex_from_empty_datetime64_array():
    for unit in ['ms', 'us', 'ns']:
        idx = DatetimeIndex(np.array([], dtype='datetime64[%s]' % unit))
        assert (len(idx) == 0)
Example #40
0
 def test_infer_freq(self, freq_sample):
     # GH 11018
     idx = pd.date_range("2011-01-01 09:00:00", freq=freq_sample, periods=10)
     result = DatetimeIndex(idx.asi8, freq="infer")
     tm.assert_index_equal(idx, result)
     assert result.freq == freq_sample
Example #41
0
    def test_range_edges(self):
        # GH#13672
        idx = pd.date_range(start=Timestamp('1970-01-01 00:00:00.000000001'),
                            end=Timestamp('1970-01-01 00:00:00.000000004'),
                            freq='N')
        exp = DatetimeIndex([
            '1970-01-01 00:00:00.000000001', '1970-01-01 00:00:00.000000002',
            '1970-01-01 00:00:00.000000003', '1970-01-01 00:00:00.000000004'
        ])
        tm.assert_index_equal(idx, exp)

        idx = pd.date_range(start=Timestamp('1970-01-01 00:00:00.000000004'),
                            end=Timestamp('1970-01-01 00:00:00.000000001'),
                            freq='N')
        exp = DatetimeIndex([])
        tm.assert_index_equal(idx, exp)

        idx = pd.date_range(start=Timestamp('1970-01-01 00:00:00.000000001'),
                            end=Timestamp('1970-01-01 00:00:00.000000001'),
                            freq='N')
        exp = DatetimeIndex(['1970-01-01 00:00:00.000000001'])
        tm.assert_index_equal(idx, exp)

        idx = pd.date_range(start=Timestamp('1970-01-01 00:00:00.000001'),
                            end=Timestamp('1970-01-01 00:00:00.000004'),
                            freq='U')
        exp = DatetimeIndex([
            '1970-01-01 00:00:00.000001', '1970-01-01 00:00:00.000002',
            '1970-01-01 00:00:00.000003', '1970-01-01 00:00:00.000004'
        ])
        tm.assert_index_equal(idx, exp)

        idx = pd.date_range(start=Timestamp('1970-01-01 00:00:00.001'),
                            end=Timestamp('1970-01-01 00:00:00.004'),
                            freq='L')
        exp = DatetimeIndex([
            '1970-01-01 00:00:00.001', '1970-01-01 00:00:00.002',
            '1970-01-01 00:00:00.003', '1970-01-01 00:00:00.004'
        ])
        tm.assert_index_equal(idx, exp)

        idx = pd.date_range(start=Timestamp('1970-01-01 00:00:01'),
                            end=Timestamp('1970-01-01 00:00:04'),
                            freq='S')
        exp = DatetimeIndex([
            '1970-01-01 00:00:01', '1970-01-01 00:00:02',
            '1970-01-01 00:00:03', '1970-01-01 00:00:04'
        ])
        tm.assert_index_equal(idx, exp)

        idx = pd.date_range(start=Timestamp('1970-01-01 00:01'),
                            end=Timestamp('1970-01-01 00:04'),
                            freq='T')
        exp = DatetimeIndex([
            '1970-01-01 00:01', '1970-01-01 00:02', '1970-01-01 00:03',
            '1970-01-01 00:04'
        ])
        tm.assert_index_equal(idx, exp)

        idx = pd.date_range(start=Timestamp('1970-01-01 01:00'),
                            end=Timestamp('1970-01-01 04:00'),
                            freq='H')
        exp = DatetimeIndex([
            '1970-01-01 01:00', '1970-01-01 02:00', '1970-01-01 03:00',
            '1970-01-01 04:00'
        ])
        tm.assert_index_equal(idx, exp)

        idx = pd.date_range(start=Timestamp('1970-01-01'),
                            end=Timestamp('1970-01-04'),
                            freq='D')
        exp = DatetimeIndex(
            ['1970-01-01', '1970-01-02', '1970-01-03', '1970-01-04'])
        tm.assert_index_equal(idx, exp)
Example #42
0
    def test_dti_tz_convert_hour_overflow_dst_timestamps(self, tz):
        # Regression test for GH#13306

        # sorted case US/Eastern -> UTC
        ts = [
            Timestamp("2008-05-12 09:50:00", tz=tz),
            Timestamp("2008-12-12 09:50:35", tz=tz),
            Timestamp("2009-05-12 09:50:32", tz=tz),
        ]
        tt = DatetimeIndex(ts)
        ut = tt.tz_convert("UTC")
        expected = Index([13, 14, 13])
        tm.assert_index_equal(ut.hour, expected)

        # sorted case UTC -> US/Eastern
        ts = [
            Timestamp("2008-05-12 13:50:00", tz="UTC"),
            Timestamp("2008-12-12 14:50:35", tz="UTC"),
            Timestamp("2009-05-12 13:50:32", tz="UTC"),
        ]
        tt = DatetimeIndex(ts)
        ut = tt.tz_convert("US/Eastern")
        expected = Index([9, 9, 9])
        tm.assert_index_equal(ut.hour, expected)

        # unsorted case US/Eastern -> UTC
        ts = [
            Timestamp("2008-05-12 09:50:00", tz=tz),
            Timestamp("2008-12-12 09:50:35", tz=tz),
            Timestamp("2008-05-12 09:50:32", tz=tz),
        ]
        tt = DatetimeIndex(ts)
        ut = tt.tz_convert("UTC")
        expected = Index([13, 14, 13])
        tm.assert_index_equal(ut.hour, expected)

        # unsorted case UTC -> US/Eastern
        ts = [
            Timestamp("2008-05-12 13:50:00", tz="UTC"),
            Timestamp("2008-12-12 14:50:35", tz="UTC"),
            Timestamp("2008-05-12 13:50:32", tz="UTC"),
        ]
        tt = DatetimeIndex(ts)
        ut = tt.tz_convert("US/Eastern")
        expected = Index([9, 9, 9])
        tm.assert_index_equal(ut.hour, expected)
Example #43
0
    def test_no_millisecond_field(self):
        with self.assertRaises(AttributeError):
            DatetimeIndex.millisecond

        with self.assertRaises(AttributeError):
            DatetimeIndex([]).millisecond
Example #44
0
    def test_set_index_datetime(self):
        # GH#3950
        df = DataFrame({
            "label": ["a", "a", "a", "b", "b", "b"],
            "datetime": [
                "2011-07-19 07:00:00",
                "2011-07-19 08:00:00",
                "2011-07-19 09:00:00",
                "2011-07-19 07:00:00",
                "2011-07-19 08:00:00",
                "2011-07-19 09:00:00",
            ],
            "value":
            range(6),
        })
        df.index = to_datetime(df.pop("datetime"), utc=True)
        df.index = df.index.tz_convert("US/Pacific")

        expected = DatetimeIndex(
            [
                "2011-07-19 07:00:00", "2011-07-19 08:00:00",
                "2011-07-19 09:00:00"
            ],
            name="datetime",
        )
        expected = expected.tz_localize("UTC").tz_convert("US/Pacific")

        df = df.set_index("label", append=True)
        tm.assert_index_equal(df.index.levels[0], expected)
        tm.assert_index_equal(df.index.levels[1],
                              Index(["a", "b"], name="label"))
        assert df.index.names == ["datetime", "label"]

        df = df.swaplevel(0, 1)
        tm.assert_index_equal(df.index.levels[0],
                              Index(["a", "b"], name="label"))
        tm.assert_index_equal(df.index.levels[1], expected)
        assert df.index.names == ["label", "datetime"]

        df = DataFrame(np.random.random(6))
        idx1 = DatetimeIndex(
            [
                "2011-07-19 07:00:00",
                "2011-07-19 08:00:00",
                "2011-07-19 09:00:00",
                "2011-07-19 07:00:00",
                "2011-07-19 08:00:00",
                "2011-07-19 09:00:00",
            ],
            tz="US/Eastern",
        )
        idx2 = DatetimeIndex(
            [
                "2012-04-01 09:00",
                "2012-04-01 09:00",
                "2012-04-01 09:00",
                "2012-04-02 09:00",
                "2012-04-02 09:00",
                "2012-04-02 09:00",
            ],
            tz="US/Eastern",
        )
        idx3 = date_range("2011-01-01 09:00", periods=6, tz="Asia/Tokyo")
        idx3 = idx3._with_freq(None)

        df = df.set_index(idx1)
        df = df.set_index(idx2, append=True)
        df = df.set_index(idx3, append=True)

        expected1 = DatetimeIndex(
            [
                "2011-07-19 07:00:00", "2011-07-19 08:00:00",
                "2011-07-19 09:00:00"
            ],
            tz="US/Eastern",
        )
        expected2 = DatetimeIndex(["2012-04-01 09:00", "2012-04-02 09:00"],
                                  tz="US/Eastern")

        tm.assert_index_equal(df.index.levels[0], expected1)
        tm.assert_index_equal(df.index.levels[1], expected2)
        tm.assert_index_equal(df.index.levels[2], idx3)

        # GH#7092
        tm.assert_index_equal(df.index.get_level_values(0), idx1)
        tm.assert_index_equal(df.index.get_level_values(1), idx2)
        tm.assert_index_equal(df.index.get_level_values(2), idx3)
Example #45
0
    def test_equals(self):
        # GH 13107
        idx = DatetimeIndex(["2011-01-01", "2011-01-02", "NaT"])
        assert idx.equals(idx)
        assert idx.equals(idx.copy())
        assert idx.equals(idx.astype(object))
        assert idx.astype(object).equals(idx)
        assert idx.astype(object).equals(idx.astype(object))
        assert not idx.equals(list(idx))
        assert not idx.equals(Series(idx))

        idx2 = DatetimeIndex(["2011-01-01", "2011-01-02", "NaT"], tz="US/Pacific")
        assert not idx.equals(idx2)
        assert not idx.equals(idx2.copy())
        assert not idx.equals(idx2.astype(object))
        assert not idx.astype(object).equals(idx2)
        assert not idx.equals(list(idx2))
        assert not idx.equals(Series(idx2))

        # same internal, different tz
        idx3 = DatetimeIndex(idx.asi8, tz="US/Pacific")
        tm.assert_numpy_array_equal(idx.asi8, idx3.asi8)
        assert not idx.equals(idx3)
        assert not idx.equals(idx3.copy())
        assert not idx.equals(idx3.astype(object))
        assert not idx.astype(object).equals(idx3)
        assert not idx.equals(list(idx3))
        assert not idx.equals(Series(idx3))

        # check that we do not raise when comparing with OutOfBounds objects
        oob = Index([datetime(2500, 1, 1)] * 3, dtype=object)
        assert not idx.equals(oob)
        assert not idx2.equals(oob)
        assert not idx3.equals(oob)

        # check that we do not raise when comparing with OutOfBounds dt64
        oob2 = oob.map(np.datetime64)
        assert not idx.equals(oob2)
        assert not idx2.equals(oob2)
        assert not idx3.equals(oob2)
Example #46
0
 def test_astype_raises(self, dtype):
     # GH 13149, GH 13209
     idx = DatetimeIndex(["2016-05-16", "NaT", NaT, np.NaN])
     msg = "Cannot cast DatetimeArray to dtype"
     with pytest.raises(TypeError, match=msg):
         idx.astype(dtype)
Example #47
0
    def test_dti_tz_nat(self, tzstr):
        idx = DatetimeIndex([Timestamp("2013-1-1", tz=tzstr), pd.NaT])

        assert isna(idx[1])
        assert idx[0].tzinfo is not None
Example #48
0
    def test_datetime64_tz_fillna(self, tz):
        # DatetimeLikeBlock
        ser = Series([
            Timestamp("2011-01-01 10:00"),
            NaT,
            Timestamp("2011-01-03 10:00"),
            NaT,
        ])
        null_loc = Series([False, True, False, True])

        result = ser.fillna(Timestamp("2011-01-02 10:00"))
        expected = Series([
            Timestamp("2011-01-01 10:00"),
            Timestamp("2011-01-02 10:00"),
            Timestamp("2011-01-03 10:00"),
            Timestamp("2011-01-02 10:00"),
        ])
        tm.assert_series_equal(expected, result)
        # check s is not changed
        tm.assert_series_equal(isna(ser), null_loc)

        result = ser.fillna(Timestamp("2011-01-02 10:00", tz=tz))
        expected = Series([
            Timestamp("2011-01-01 10:00"),
            Timestamp("2011-01-02 10:00", tz=tz),
            Timestamp("2011-01-03 10:00"),
            Timestamp("2011-01-02 10:00", tz=tz),
        ])
        tm.assert_series_equal(expected, result)
        tm.assert_series_equal(isna(ser), null_loc)

        result = ser.fillna("AAA")
        expected = Series(
            [
                Timestamp("2011-01-01 10:00"),
                "AAA",
                Timestamp("2011-01-03 10:00"),
                "AAA",
            ],
            dtype=object,
        )
        tm.assert_series_equal(expected, result)
        tm.assert_series_equal(isna(ser), null_loc)

        result = ser.fillna({
            1: Timestamp("2011-01-02 10:00", tz=tz),
            3: Timestamp("2011-01-04 10:00"),
        })
        expected = Series([
            Timestamp("2011-01-01 10:00"),
            Timestamp("2011-01-02 10:00", tz=tz),
            Timestamp("2011-01-03 10:00"),
            Timestamp("2011-01-04 10:00"),
        ])
        tm.assert_series_equal(expected, result)
        tm.assert_series_equal(isna(ser), null_loc)

        result = ser.fillna({
            1: Timestamp("2011-01-02 10:00"),
            3: Timestamp("2011-01-04 10:00")
        })
        expected = Series([
            Timestamp("2011-01-01 10:00"),
            Timestamp("2011-01-02 10:00"),
            Timestamp("2011-01-03 10:00"),
            Timestamp("2011-01-04 10:00"),
        ])
        tm.assert_series_equal(expected, result)
        tm.assert_series_equal(isna(ser), null_loc)

        # DatetimeTZBlock
        idx = DatetimeIndex(["2011-01-01 10:00", NaT, "2011-01-03 10:00", NaT],
                            tz=tz)
        ser = Series(idx)
        assert ser.dtype == f"datetime64[ns, {tz}]"
        tm.assert_series_equal(isna(ser), null_loc)

        result = ser.fillna(Timestamp("2011-01-02 10:00"))
        expected = Series([
            Timestamp("2011-01-01 10:00", tz=tz),
            Timestamp("2011-01-02 10:00"),
            Timestamp("2011-01-03 10:00", tz=tz),
            Timestamp("2011-01-02 10:00"),
        ])
        tm.assert_series_equal(expected, result)
        tm.assert_series_equal(isna(ser), null_loc)

        result = ser.fillna(Timestamp("2011-01-02 10:00", tz=tz))
        idx = DatetimeIndex(
            [
                "2011-01-01 10:00",
                "2011-01-02 10:00",
                "2011-01-03 10:00",
                "2011-01-02 10:00",
            ],
            tz=tz,
        )
        expected = Series(idx)
        tm.assert_series_equal(expected, result)
        tm.assert_series_equal(isna(ser), null_loc)

        result = ser.fillna(
            Timestamp("2011-01-02 10:00", tz=tz).to_pydatetime())
        idx = DatetimeIndex(
            [
                "2011-01-01 10:00",
                "2011-01-02 10:00",
                "2011-01-03 10:00",
                "2011-01-02 10:00",
            ],
            tz=tz,
        )
        expected = Series(idx)
        tm.assert_series_equal(expected, result)
        tm.assert_series_equal(isna(ser), null_loc)

        result = ser.fillna("AAA")
        expected = Series(
            [
                Timestamp("2011-01-01 10:00", tz=tz),
                "AAA",
                Timestamp("2011-01-03 10:00", tz=tz),
                "AAA",
            ],
            dtype=object,
        )
        tm.assert_series_equal(expected, result)
        tm.assert_series_equal(isna(ser), null_loc)

        result = ser.fillna({
            1: Timestamp("2011-01-02 10:00", tz=tz),
            3: Timestamp("2011-01-04 10:00"),
        })
        expected = Series([
            Timestamp("2011-01-01 10:00", tz=tz),
            Timestamp("2011-01-02 10:00", tz=tz),
            Timestamp("2011-01-03 10:00", tz=tz),
            Timestamp("2011-01-04 10:00"),
        ])
        tm.assert_series_equal(expected, result)
        tm.assert_series_equal(isna(ser), null_loc)

        result = ser.fillna({
            1: Timestamp("2011-01-02 10:00", tz=tz),
            3: Timestamp("2011-01-04 10:00", tz=tz),
        })
        expected = Series([
            Timestamp("2011-01-01 10:00", tz=tz),
            Timestamp("2011-01-02 10:00", tz=tz),
            Timestamp("2011-01-03 10:00", tz=tz),
            Timestamp("2011-01-04 10:00", tz=tz),
        ])
        tm.assert_series_equal(expected, result)
        tm.assert_series_equal(isna(ser), null_loc)

        # filling with a naive/other zone, coerce to object
        result = ser.fillna(Timestamp("20130101"))
        expected = Series([
            Timestamp("2011-01-01 10:00", tz=tz),
            Timestamp("2013-01-01"),
            Timestamp("2011-01-03 10:00", tz=tz),
            Timestamp("2013-01-01"),
        ])
        tm.assert_series_equal(expected, result)
        tm.assert_series_equal(isna(ser), null_loc)

        result = ser.fillna(Timestamp("20130101", tz="US/Pacific"))
        expected = Series([
            Timestamp("2011-01-01 10:00", tz=tz),
            Timestamp("2013-01-01", tz="US/Pacific"),
            Timestamp("2011-01-03 10:00", tz=tz),
            Timestamp("2013-01-01", tz="US/Pacific"),
        ])
        tm.assert_series_equal(expected, result)
        tm.assert_series_equal(isna(ser), null_loc)
Example #49
0
 def test_cdaterange_holidays(self):
     rng = cdate_range('2013-05-01', periods=3, holidays=['2013-05-01'])
     xp = DatetimeIndex(['2013-05-02', '2013-05-03', '2013-05-06'])
     tm.assert_index_equal(xp, rng)
Example #50
0
    def test_dti_tz_localize_ambiguous_flags(self, tz):
        # November 6, 2011, fall back, repeat 2 AM hour

        # Pass in flags to determine right dst transition
        dr = date_range(datetime(2011, 11, 6, 0),
                        periods=5,
                        freq=pd.offsets.Hour(),
                        tz=tz)
        times = [
            "11/06/2011 00:00",
            "11/06/2011 01:00",
            "11/06/2011 01:00",
            "11/06/2011 02:00",
            "11/06/2011 03:00",
        ]

        # Test tz_localize
        di = DatetimeIndex(times)
        is_dst = [1, 1, 0, 0, 0]
        localized = di.tz_localize(tz, ambiguous=is_dst)
        expected = dr._with_freq(None)
        tm.assert_index_equal(expected, localized)
        tm.assert_index_equal(expected,
                              DatetimeIndex(times, tz=tz, ambiguous=is_dst))

        localized = di.tz_localize(tz, ambiguous=np.array(is_dst))
        tm.assert_index_equal(dr, localized)

        localized = di.tz_localize(tz,
                                   ambiguous=np.array(is_dst).astype("bool"))
        tm.assert_index_equal(dr, localized)

        # Test constructor
        localized = DatetimeIndex(times, tz=tz, ambiguous=is_dst)
        tm.assert_index_equal(dr, localized)

        # Test duplicate times where inferring the dst fails
        times += times
        di = DatetimeIndex(times)

        # When the sizes are incompatible, make sure error is raised
        msg = "Length of ambiguous bool-array must be the same size as vals"
        with pytest.raises(Exception, match=msg):
            di.tz_localize(tz, ambiguous=is_dst)

        # When sizes are compatible and there are repeats ('infer' won't work)
        is_dst = np.hstack((is_dst, is_dst))
        localized = di.tz_localize(tz, ambiguous=is_dst)
        dr = dr.append(dr)
        tm.assert_index_equal(dr, localized)

        # When there is no dst transition, nothing special happens
        dr = date_range(datetime(2011, 6, 1, 0),
                        periods=10,
                        freq=pd.offsets.Hour())
        is_dst = np.array([1] * 10)
        localized = dr.tz_localize(tz)
        localized_is_dst = dr.tz_localize(tz, ambiguous=is_dst)
        tm.assert_index_equal(localized, localized_is_dst)
Example #51
0
 def test_cdaterange_weekmask(self):
     rng = cdate_range('2013-05-01',
                       periods=3,
                       weekmask='Sun Mon Tue Wed Thu')
     xp = DatetimeIndex(['2013-05-01', '2013-05-02', '2013-05-05'])
     tm.assert_index_equal(xp, rng)
Example #52
0
    def test_value_counts_inferred(self):
        klasses = [Index, Series]
        for klass in klasses:
            s_values = ['a', 'b', 'b', 'b', 'b', 'c', 'd', 'd', 'a', 'a']
            s = klass(s_values)
            expected = Series([4, 3, 2, 1], index=['b', 'a', 'd', 'c'])
            tm.assert_series_equal(s.value_counts(), expected)

            self.assert_numpy_array_equal(s.unique(), np.unique(s_values))
            self.assertEqual(s.nunique(), 4)
            # don't sort, have to sort after the fact as not sorting is platform-dep
            hist = s.value_counts(sort=False)
            hist.sort()
            expected = Series([3, 1, 4, 2], index=list('acbd'))
            expected.sort()
            tm.assert_series_equal(hist, expected)

            # sort ascending
            hist = s.value_counts(ascending=True)
            expected = Series([1, 2, 3, 4], index=list('cdab'))
            tm.assert_series_equal(hist, expected)

            # relative histogram.
            hist = s.value_counts(normalize=True)
            expected = Series([.4, .3, .2, .1], index=['b', 'a', 'd', 'c'])
            tm.assert_series_equal(hist, expected)

            # bins
            self.assertRaises(TypeError,
                              lambda bins: s.value_counts(bins=bins), 1)

            s1 = Series([1, 1, 2, 3])
            res1 = s1.value_counts(bins=1)
            exp1 = Series({0.998: 4})
            tm.assert_series_equal(res1, exp1)
            res1n = s1.value_counts(bins=1, normalize=True)
            exp1n = Series({0.998: 1.0})
            tm.assert_series_equal(res1n, exp1n)

            self.assert_numpy_array_equal(s1.unique(), np.array([1, 2, 3]))
            self.assertEqual(s1.nunique(), 3)

            res4 = s1.value_counts(bins=4)
            exp4 = Series({
                0.998: 2,
                1.5: 1,
                2.0: 0,
                2.5: 1
            },
                          index=[0.998, 2.5, 1.5, 2.0])
            tm.assert_series_equal(res4, exp4)
            res4n = s1.value_counts(bins=4, normalize=True)
            exp4n = Series({
                0.998: 0.5,
                1.5: 0.25,
                2.0: 0.0,
                2.5: 0.25
            },
                           index=[0.998, 2.5, 1.5, 2.0])
            tm.assert_series_equal(res4n, exp4n)

            # handle NA's properly
            s_values = [
                'a', 'b', 'b', 'b', np.nan, np.nan, 'd', 'd', 'a', 'a', 'b'
            ]
            s = klass(s_values)
            expected = Series([4, 3, 2], index=['b', 'a', 'd'])
            tm.assert_series_equal(s.value_counts(), expected)

            self.assert_numpy_array_equal(
                s.unique(), np.array(['a', 'b', np.nan, 'd'], dtype='O'))
            self.assertEqual(s.nunique(), 3)

            s = klass({})
            expected = Series([], dtype=np.int64)
            tm.assert_series_equal(s.value_counts(), expected)
            self.assert_numpy_array_equal(s.unique(), np.array([]))
            self.assertEqual(s.nunique(), 0)

            # GH 3002, datetime64[ns]
            txt = "\n".join([
                'xxyyzz20100101PIE', 'xxyyzz20100101GUM', 'xxyyzz20100101EGG',
                'xxyyww20090101EGG', 'foofoo20080909PIE', 'foofoo20080909GUM'
            ])
            f = StringIO(txt)
            df = pd.read_fwf(f,
                             widths=[6, 8, 3],
                             names=["person_id", "dt", "food"],
                             parse_dates=["dt"])

            s = klass(df['dt'].copy())

            idx = pd.to_datetime([
                '2010-01-01 00:00:00Z', '2008-09-09 00:00:00Z',
                '2009-01-01 00:00:00X'
            ])
            expected_s = Series([3, 2, 1], index=idx)
            tm.assert_series_equal(s.value_counts(), expected_s)

            expected = np.array([
                '2010-01-01 00:00:00Z', '2009-01-01 00:00:00Z',
                '2008-09-09 00:00:00Z'
            ],
                                dtype='datetime64[ns]')
            if isinstance(s, DatetimeIndex):
                expected = DatetimeIndex(expected)
                self.assertTrue(s.unique().equals(expected))
            else:
                self.assert_numpy_array_equal(s.unique(), expected)

            self.assertEqual(s.nunique(), 3)

            # with NaT
            s = df['dt'].copy()
            s = klass([v for v in s.values] + [pd.NaT])

            result = s.value_counts()
            self.assertEqual(result.index.dtype, 'datetime64[ns]')
            tm.assert_series_equal(result, expected_s)

            result = s.value_counts(dropna=False)
            expected_s[pd.NaT] = 1
            tm.assert_series_equal(result, expected_s)

            unique = s.unique()
            self.assertEqual(unique.dtype, 'datetime64[ns]')
            # numpy_array_equal cannot compare pd.NaT
            self.assert_numpy_array_equal(unique[:3], expected)
            self.assertTrue(unique[3] is pd.NaT
                            or unique[3].astype('int64') == pd.tslib.iNaT)

            self.assertEqual(s.nunique(), 3)
            self.assertEqual(s.nunique(dropna=False), 4)

            # timedelta64[ns]
            td = df.dt - df.dt + timedelta(1)
            td = klass(td)

            result = td.value_counts()
            expected_s = Series([6], index=[Timedelta('1day')])
            tm.assert_series_equal(result, expected_s)

            expected = TimedeltaIndex(['1 days'])
            if isinstance(td, TimedeltaIndex):
                self.assertTrue(td.unique().equals(expected))
            else:
                self.assert_numpy_array_equal(td.unique(), expected.values)

            td2 = timedelta(1) + (df.dt - df.dt)
            td2 = klass(td2)
            result2 = td2.value_counts()

            tm.assert_series_equal(result2, expected_s)
Example #53
0
    def test_tz_convert_nat(self):
        # GH#5546
        dates = [pd.NaT]
        idx = DatetimeIndex(dates)
        idx = idx.tz_localize("US/Pacific")
        tm.assert_index_equal(idx, DatetimeIndex(dates, tz="US/Pacific"))
        idx = idx.tz_convert("US/Eastern")
        tm.assert_index_equal(idx, DatetimeIndex(dates, tz="US/Eastern"))
        idx = idx.tz_convert("UTC")
        tm.assert_index_equal(idx, DatetimeIndex(dates, tz="UTC"))

        dates = ["2010-12-01 00:00", "2010-12-02 00:00", pd.NaT]
        idx = DatetimeIndex(dates)
        idx = idx.tz_localize("US/Pacific")
        tm.assert_index_equal(idx, DatetimeIndex(dates, tz="US/Pacific"))
        idx = idx.tz_convert("US/Eastern")
        expected = ["2010-12-01 03:00", "2010-12-02 03:00", pd.NaT]
        tm.assert_index_equal(idx, DatetimeIndex(expected, tz="US/Eastern"))

        idx = idx + pd.offsets.Hour(5)
        expected = ["2010-12-01 08:00", "2010-12-02 08:00", pd.NaT]
        tm.assert_index_equal(idx, DatetimeIndex(expected, tz="US/Eastern"))
        idx = idx.tz_convert("US/Pacific")
        expected = ["2010-12-01 05:00", "2010-12-02 05:00", pd.NaT]
        tm.assert_index_equal(idx, DatetimeIndex(expected, tz="US/Pacific"))

        idx = idx + np.timedelta64(3, "h")
        expected = ["2010-12-01 08:00", "2010-12-02 08:00", pd.NaT]
        tm.assert_index_equal(idx, DatetimeIndex(expected, tz="US/Pacific"))

        idx = idx.tz_convert("US/Eastern")
        expected = ["2010-12-01 11:00", "2010-12-02 11:00", pd.NaT]
        tm.assert_index_equal(idx, DatetimeIndex(expected, tz="US/Eastern"))
Example #54
0
    def test_dti_tz_convert_hour_overflow_dst(self):
        # Regression test for:
        # https://github.com/pandas-dev/pandas/issues/13306

        # sorted case US/Eastern -> UTC
        ts = [
            "2008-05-12 09:50:00", "2008-12-12 09:50:35", "2009-05-12 09:50:32"
        ]
        tt = DatetimeIndex(ts).tz_localize("US/Eastern")
        ut = tt.tz_convert("UTC")
        expected = Index([13, 14, 13])
        tm.assert_index_equal(ut.hour, expected)

        # sorted case UTC -> US/Eastern
        ts = [
            "2008-05-12 13:50:00", "2008-12-12 14:50:35", "2009-05-12 13:50:32"
        ]
        tt = DatetimeIndex(ts).tz_localize("UTC")
        ut = tt.tz_convert("US/Eastern")
        expected = Index([9, 9, 9])
        tm.assert_index_equal(ut.hour, expected)

        # unsorted case US/Eastern -> UTC
        ts = [
            "2008-05-12 09:50:00", "2008-12-12 09:50:35", "2008-05-12 09:50:32"
        ]
        tt = DatetimeIndex(ts).tz_localize("US/Eastern")
        ut = tt.tz_convert("UTC")
        expected = Index([13, 14, 13])
        tm.assert_index_equal(ut.hour, expected)

        # unsorted case UTC -> US/Eastern
        ts = [
            "2008-05-12 13:50:00", "2008-12-12 14:50:35", "2008-05-12 13:50:32"
        ]
        tt = DatetimeIndex(ts).tz_localize("UTC")
        ut = tt.tz_convert("US/Eastern")
        expected = Index([9, 9, 9])
        tm.assert_index_equal(ut.hour, expected)
Example #55
0
    def test_intersection(self, tz, sort):
        # GH 4690 (with tz)
        base = date_range("6/1/2000", "6/30/2000", freq="D", name="idx")

        # if target has the same name, it is preserved
        rng2 = date_range("5/15/2000", "6/20/2000", freq="D", name="idx")
        expected2 = date_range("6/1/2000", "6/20/2000", freq="D", name="idx")

        # if target name is different, it will be reset
        rng3 = date_range("5/15/2000", "6/20/2000", freq="D", name="other")
        expected3 = date_range("6/1/2000", "6/20/2000", freq="D", name=None)

        rng4 = date_range("7/1/2000", "7/31/2000", freq="D", name="idx")
        expected4 = DatetimeIndex([], freq="D", name="idx")

        for (rng, expected) in [
            (rng2, expected2),
            (rng3, expected3),
            (rng4, expected4),
        ]:
            result = base.intersection(rng)
            tm.assert_index_equal(result, expected)
            assert result.freq == expected.freq

        # non-monotonic
        base = DatetimeIndex(
            ["2011-01-05", "2011-01-04", "2011-01-02", "2011-01-03"],
            tz=tz,
            name="idx")

        rng2 = DatetimeIndex(
            ["2011-01-04", "2011-01-02", "2011-02-02", "2011-02-03"],
            tz=tz,
            name="idx")
        expected2 = DatetimeIndex(["2011-01-04", "2011-01-02"],
                                  tz=tz,
                                  name="idx")

        rng3 = DatetimeIndex(
            ["2011-01-04", "2011-01-02", "2011-02-02", "2011-02-03"],
            tz=tz,
            name="other",
        )
        expected3 = DatetimeIndex(["2011-01-04", "2011-01-02"],
                                  tz=tz,
                                  name=None)

        # GH 7880
        rng4 = date_range("7/1/2000", "7/31/2000", freq="D", tz=tz, name="idx")
        expected4 = DatetimeIndex([], tz=tz, name="idx")
        assert expected4.freq is None

        for (rng, expected) in [
            (rng2, expected2),
            (rng3, expected3),
            (rng4, expected4),
        ]:
            result = base.intersection(rng, sort=sort)
            if sort is None:
                expected = expected.sort_values()
            tm.assert_index_equal(result, expected)
            assert result.freq == expected.freq
Example #56
0
    def test_construction_dti_with_mixed_timezones(self):
        # GH 11488 (not changed, added explicit tests)

        # no tz results in DatetimeIndex
        result = DatetimeIndex(
            [Timestamp('2011-01-01'),
             Timestamp('2011-01-02')], name='idx')
        exp = DatetimeIndex([Timestamp('2011-01-01'),
                             Timestamp('2011-01-02')],
                            name='idx')
        self.assert_index_equal(result, exp, exact=True)
        self.assertTrue(isinstance(result, DatetimeIndex))

        # same tz results in DatetimeIndex
        result = DatetimeIndex([
            Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'),
            Timestamp('2011-01-02 10:00', tz='Asia/Tokyo')
        ],
                               name='idx')
        exp = DatetimeIndex(
            [Timestamp('2011-01-01 10:00'),
             Timestamp('2011-01-02 10:00')],
            tz='Asia/Tokyo',
            name='idx')
        self.assert_index_equal(result, exp, exact=True)
        self.assertTrue(isinstance(result, DatetimeIndex))

        # same tz results in DatetimeIndex (DST)
        result = DatetimeIndex([
            Timestamp('2011-01-01 10:00', tz='US/Eastern'),
            Timestamp('2011-08-01 10:00', tz='US/Eastern')
        ],
                               name='idx')
        exp = DatetimeIndex(
            [Timestamp('2011-01-01 10:00'),
             Timestamp('2011-08-01 10:00')],
            tz='US/Eastern',
            name='idx')
        self.assert_index_equal(result, exp, exact=True)
        self.assertTrue(isinstance(result, DatetimeIndex))

        # different tz coerces tz-naive to tz-awareIndex(dtype=object)
        result = DatetimeIndex([
            Timestamp('2011-01-01 10:00'),
            Timestamp('2011-01-02 10:00', tz='US/Eastern')
        ],
                               name='idx')
        exp = DatetimeIndex(
            [Timestamp('2011-01-01 05:00'),
             Timestamp('2011-01-02 10:00')],
            tz='US/Eastern',
            name='idx')
        self.assert_index_equal(result, exp, exact=True)
        self.assertTrue(isinstance(result, DatetimeIndex))

        # tz mismatch affecting to tz-aware raises TypeError/ValueError

        with tm.assertRaises(ValueError):
            DatetimeIndex([
                Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'),
                Timestamp('2011-01-02 10:00', tz='US/Eastern')
            ],
                          name='idx')

        with tm.assertRaisesRegexp(TypeError, 'data is already tz-aware'):
            DatetimeIndex([
                Timestamp('2011-01-01 10:00'),
                Timestamp('2011-01-02 10:00', tz='US/Eastern')
            ],
                          tz='Asia/Tokyo',
                          name='idx')

        with tm.assertRaises(ValueError):
            DatetimeIndex([
                Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'),
                Timestamp('2011-01-02 10:00', tz='US/Eastern')
            ],
                          tz='US/Eastern',
                          name='idx')

        with tm.assertRaisesRegexp(TypeError, 'data is already tz-aware'):
            # passing tz should results in DatetimeIndex, then mismatch raises
            # TypeError
            Index([
                pd.NaT,
                Timestamp('2011-01-01 10:00'), pd.NaT,
                Timestamp('2011-01-02 10:00', tz='US/Eastern')
            ],
                  tz='Asia/Tokyo',
                  name='idx')
Example #57
0
 def test_nat(self):
     self.assertIs(DatetimeIndex([np.nan])[0], pd.NaT)
Example #58
0
 def test_dti_convert_datetime_list(self, tzstr):
     dr = date_range("2012-06-02", periods=10, tz=tzstr, name="foo")
     dr2 = DatetimeIndex(list(dr), name="foo", freq="D")
     tm.assert_index_equal(dr, dr2)
Example #59
0
class TestDatetimeIndexOps:
    def test_ops_properties_basic(self, datetime_series):

        # sanity check that the behavior didn't change
        # GH#7206
        for op in ["year", "day", "second", "weekday"]:
            msg = f"'Series' object has no attribute '{op}'"
            with pytest.raises(AttributeError, match=msg):
                getattr(datetime_series, op)

        # attribute access should still work!
        s = Series(dict(year=2000, month=1, day=10))
        assert s.year == 2000
        assert s.month == 1
        assert s.day == 10
        msg = "'Series' object has no attribute 'weekday'"
        with pytest.raises(AttributeError, match=msg):
            s.weekday

    def test_repeat_range(self, tz_naive_fixture):
        tz = tz_naive_fixture
        rng = date_range("1/1/2000", "1/1/2001")

        result = rng.repeat(5)
        assert result.freq is None
        assert len(result) == 5 * len(rng)

        index = pd.date_range("2001-01-01", periods=2, freq="D", tz=tz)
        exp = DatetimeIndex(
            ["2001-01-01", "2001-01-01", "2001-01-02", "2001-01-02"], tz=tz
        )
        for res in [index.repeat(2), np.repeat(index, 2)]:
            tm.assert_index_equal(res, exp)
            assert res.freq is None

        index = pd.date_range("2001-01-01", periods=2, freq="2D", tz=tz)
        exp = DatetimeIndex(
            ["2001-01-01", "2001-01-01", "2001-01-03", "2001-01-03"], tz=tz
        )
        for res in [index.repeat(2), np.repeat(index, 2)]:
            tm.assert_index_equal(res, exp)
            assert res.freq is None

        index = DatetimeIndex(["2001-01-01", "NaT", "2003-01-01"], tz=tz)
        exp = DatetimeIndex(
            [
                "2001-01-01",
                "2001-01-01",
                "2001-01-01",
                "NaT",
                "NaT",
                "NaT",
                "2003-01-01",
                "2003-01-01",
                "2003-01-01",
            ],
            tz=tz,
        )
        for res in [index.repeat(3), np.repeat(index, 3)]:
            tm.assert_index_equal(res, exp)
            assert res.freq is None

    def test_repeat(self, tz_naive_fixture):
        tz = tz_naive_fixture
        reps = 2
        msg = "the 'axis' parameter is not supported"

        rng = pd.date_range(start="2016-01-01", periods=2, freq="30Min", tz=tz)

        expected_rng = DatetimeIndex(
            [
                Timestamp("2016-01-01 00:00:00", tz=tz, freq="30T"),
                Timestamp("2016-01-01 00:00:00", tz=tz, freq="30T"),
                Timestamp("2016-01-01 00:30:00", tz=tz, freq="30T"),
                Timestamp("2016-01-01 00:30:00", tz=tz, freq="30T"),
            ]
        )

        res = rng.repeat(reps)
        tm.assert_index_equal(res, expected_rng)
        assert res.freq is None

        tm.assert_index_equal(np.repeat(rng, reps), expected_rng)
        with pytest.raises(ValueError, match=msg):
            np.repeat(rng, reps, axis=1)

    def test_resolution(self, tz_naive_fixture):
        tz = tz_naive_fixture
        for freq, expected in zip(
            ["A", "Q", "M", "D", "H", "T", "S", "L", "U"],
            [
                "day",
                "day",
                "day",
                "day",
                "hour",
                "minute",
                "second",
                "millisecond",
                "microsecond",
            ],
        ):
            idx = pd.date_range(start="2013-04-01", periods=30, freq=freq, tz=tz)
            assert idx.resolution == expected

    def test_value_counts_unique(self, tz_naive_fixture):
        tz = tz_naive_fixture
        # GH 7735
        idx = pd.date_range("2011-01-01 09:00", freq="H", periods=10)
        # create repeated values, 'n'th element is repeated by n+1 times
        idx = DatetimeIndex(np.repeat(idx.values, range(1, len(idx) + 1)), tz=tz)

        exp_idx = pd.date_range("2011-01-01 18:00", freq="-1H", periods=10, tz=tz)
        expected = Series(range(10, 0, -1), index=exp_idx, dtype="int64")
        expected.index = expected.index._with_freq(None)

        for obj in [idx, Series(idx)]:

            tm.assert_series_equal(obj.value_counts(), expected)

        expected = pd.date_range("2011-01-01 09:00", freq="H", periods=10, tz=tz)
        expected = expected._with_freq(None)
        tm.assert_index_equal(idx.unique(), expected)

        idx = DatetimeIndex(
            [
                "2013-01-01 09:00",
                "2013-01-01 09:00",
                "2013-01-01 09:00",
                "2013-01-01 08:00",
                "2013-01-01 08:00",
                pd.NaT,
            ],
            tz=tz,
        )

        exp_idx = DatetimeIndex(["2013-01-01 09:00", "2013-01-01 08:00"], tz=tz)
        expected = Series([3, 2], index=exp_idx)

        for obj in [idx, Series(idx)]:
            tm.assert_series_equal(obj.value_counts(), expected)

        exp_idx = DatetimeIndex(["2013-01-01 09:00", "2013-01-01 08:00", pd.NaT], tz=tz)
        expected = Series([3, 2, 1], index=exp_idx)

        for obj in [idx, Series(idx)]:
            tm.assert_series_equal(obj.value_counts(dropna=False), expected)

        tm.assert_index_equal(idx.unique(), exp_idx)

    @pytest.mark.parametrize(
        "idx",
        [
            DatetimeIndex(
                ["2011-01-01", "2011-01-02", "2011-01-03"], freq="D", name="idx"
            ),
            DatetimeIndex(
                ["2011-01-01 09:00", "2011-01-01 10:00", "2011-01-01 11:00"],
                freq="H",
                name="tzidx",
                tz="Asia/Tokyo",
            ),
        ],
    )
    def test_order_with_freq(self, idx):
        ordered = idx.sort_values()
        tm.assert_index_equal(ordered, idx)
        assert ordered.freq == idx.freq

        ordered = idx.sort_values(ascending=False)
        expected = idx[::-1]
        tm.assert_index_equal(ordered, expected)
        assert ordered.freq == expected.freq
        assert ordered.freq.n == -1

        ordered, indexer = idx.sort_values(return_indexer=True)
        tm.assert_index_equal(ordered, idx)
        tm.assert_numpy_array_equal(indexer, np.array([0, 1, 2]), check_dtype=False)
        assert ordered.freq == idx.freq

        ordered, indexer = idx.sort_values(return_indexer=True, ascending=False)
        expected = idx[::-1]
        tm.assert_index_equal(ordered, expected)
        tm.assert_numpy_array_equal(indexer, np.array([2, 1, 0]), check_dtype=False)
        assert ordered.freq == expected.freq
        assert ordered.freq.n == -1

    @pytest.mark.parametrize(
        "index_dates,expected_dates",
        [
            (
                ["2011-01-01", "2011-01-03", "2011-01-05", "2011-01-02", "2011-01-01"],
                ["2011-01-01", "2011-01-01", "2011-01-02", "2011-01-03", "2011-01-05"],
            ),
            (
                ["2011-01-01", "2011-01-03", "2011-01-05", "2011-01-02", "2011-01-01"],
                ["2011-01-01", "2011-01-01", "2011-01-02", "2011-01-03", "2011-01-05"],
            ),
            (
                [pd.NaT, "2011-01-03", "2011-01-05", "2011-01-02", pd.NaT],
                [pd.NaT, pd.NaT, "2011-01-02", "2011-01-03", "2011-01-05"],
            ),
        ],
    )
    def test_order_without_freq(self, index_dates, expected_dates, tz_naive_fixture):
        tz = tz_naive_fixture

        # without freq
        index = DatetimeIndex(index_dates, tz=tz, name="idx")
        expected = DatetimeIndex(expected_dates, tz=tz, name="idx")

        ordered = index.sort_values()
        tm.assert_index_equal(ordered, expected)
        assert ordered.freq is None

        ordered = index.sort_values(ascending=False)
        tm.assert_index_equal(ordered, expected[::-1])
        assert ordered.freq is None

        ordered, indexer = index.sort_values(return_indexer=True)
        tm.assert_index_equal(ordered, expected)

        exp = np.array([0, 4, 3, 1, 2])
        tm.assert_numpy_array_equal(indexer, exp, check_dtype=False)
        assert ordered.freq is None

        ordered, indexer = index.sort_values(return_indexer=True, ascending=False)
        tm.assert_index_equal(ordered, expected[::-1])

        exp = np.array([2, 1, 3, 4, 0])
        tm.assert_numpy_array_equal(indexer, exp, check_dtype=False)
        assert ordered.freq is None

    def test_drop_duplicates_metadata(self, freq_sample):
        # GH 10115
        idx = pd.date_range("2011-01-01", freq=freq_sample, periods=10, name="idx")
        result = idx.drop_duplicates()
        tm.assert_index_equal(idx, result)
        assert idx.freq == result.freq

        idx_dup = idx.append(idx)
        assert idx_dup.freq is None  # freq is reset
        result = idx_dup.drop_duplicates()
        expected = idx._with_freq(None)
        tm.assert_index_equal(result, expected)
        assert result.freq is None

    @pytest.mark.parametrize(
        "keep, expected, index",
        [
            ("first", np.concatenate(([False] * 10, [True] * 5)), np.arange(0, 10)),
            ("last", np.concatenate(([True] * 5, [False] * 10)), np.arange(5, 15)),
            (
                False,
                np.concatenate(([True] * 5, [False] * 5, [True] * 5)),
                np.arange(5, 10),
            ),
        ],
    )
    def test_drop_duplicates(self, freq_sample, keep, expected, index):
        # to check Index/Series compat
        idx = pd.date_range("2011-01-01", freq=freq_sample, periods=10, name="idx")
        idx = idx.append(idx[:5])

        tm.assert_numpy_array_equal(idx.duplicated(keep=keep), expected)
        expected = idx[~expected]

        result = idx.drop_duplicates(keep=keep)
        tm.assert_index_equal(result, expected)

        result = Series(idx).drop_duplicates(keep=keep)
        tm.assert_series_equal(result, Series(expected, index=index))

    def test_infer_freq(self, freq_sample):
        # GH 11018
        idx = pd.date_range("2011-01-01 09:00:00", freq=freq_sample, periods=10)
        result = DatetimeIndex(idx.asi8, freq="infer")
        tm.assert_index_equal(idx, result)
        assert result.freq == freq_sample

    def test_nat(self, tz_naive_fixture):
        tz = tz_naive_fixture
        assert DatetimeIndex._na_value is pd.NaT
        assert DatetimeIndex([])._na_value is pd.NaT

        idx = DatetimeIndex(["2011-01-01", "2011-01-02"], tz=tz)
        assert idx._can_hold_na

        tm.assert_numpy_array_equal(idx._isnan, np.array([False, False]))
        assert idx.hasnans is False
        tm.assert_numpy_array_equal(idx._nan_idxs, np.array([], dtype=np.intp))

        idx = DatetimeIndex(["2011-01-01", "NaT"], tz=tz)
        assert idx._can_hold_na

        tm.assert_numpy_array_equal(idx._isnan, np.array([False, True]))
        assert idx.hasnans is True
        tm.assert_numpy_array_equal(idx._nan_idxs, np.array([1], dtype=np.intp))

    def test_equals(self):
        # GH 13107
        idx = DatetimeIndex(["2011-01-01", "2011-01-02", "NaT"])
        assert idx.equals(idx)
        assert idx.equals(idx.copy())
        assert idx.equals(idx.astype(object))
        assert idx.astype(object).equals(idx)
        assert idx.astype(object).equals(idx.astype(object))
        assert not idx.equals(list(idx))
        assert not idx.equals(Series(idx))

        idx2 = DatetimeIndex(["2011-01-01", "2011-01-02", "NaT"], tz="US/Pacific")
        assert not idx.equals(idx2)
        assert not idx.equals(idx2.copy())
        assert not idx.equals(idx2.astype(object))
        assert not idx.astype(object).equals(idx2)
        assert not idx.equals(list(idx2))
        assert not idx.equals(Series(idx2))

        # same internal, different tz
        idx3 = DatetimeIndex(idx.asi8, tz="US/Pacific")
        tm.assert_numpy_array_equal(idx.asi8, idx3.asi8)
        assert not idx.equals(idx3)
        assert not idx.equals(idx3.copy())
        assert not idx.equals(idx3.astype(object))
        assert not idx.astype(object).equals(idx3)
        assert not idx.equals(list(idx3))
        assert not idx.equals(Series(idx3))

        # check that we do not raise when comparing with OutOfBounds objects
        oob = Index([datetime(2500, 1, 1)] * 3, dtype=object)
        assert not idx.equals(oob)
        assert not idx2.equals(oob)
        assert not idx3.equals(oob)

        # check that we do not raise when comparing with OutOfBounds dt64
        oob2 = oob.map(np.datetime64)
        assert not idx.equals(oob2)
        assert not idx2.equals(oob2)
        assert not idx3.equals(oob2)

    @pytest.mark.parametrize("values", [["20180101", "20180103", "20180105"], []])
    @pytest.mark.parametrize("freq", ["2D", Day(2), "2B", BDay(2), "48H", Hour(48)])
    @pytest.mark.parametrize("tz", [None, "US/Eastern"])
    def test_freq_setter(self, values, freq, tz):
        # GH 20678
        idx = DatetimeIndex(values, tz=tz)

        # can set to an offset, converting from string if necessary
        idx._data.freq = freq
        assert idx.freq == freq
        assert isinstance(idx.freq, DateOffset)

        # can reset to None
        idx._data.freq = None
        assert idx.freq is None

    def test_freq_setter_errors(self):
        # GH 20678
        idx = DatetimeIndex(["20180101", "20180103", "20180105"])

        # setting with an incompatible freq
        msg = (
            "Inferred frequency 2D from passed values does not conform to "
            "passed frequency 5D"
        )
        with pytest.raises(ValueError, match=msg):
            idx._data.freq = "5D"

        # setting with non-freq string
        with pytest.raises(ValueError, match="Invalid frequency"):
            idx._data.freq = "foo"

    def test_freq_view_safe(self):
        # Setting the freq for one DatetimeIndex shouldn't alter the freq
        #  for another that views the same data

        dti = pd.date_range("2016-01-01", periods=5)
        dta = dti._data

        dti2 = DatetimeIndex(dta)._with_freq(None)
        assert dti2.freq is None

        # Original was not altered
        assert dti.freq == "D"
        assert dta.freq == "D"
Example #60
0
    def test_order(self):
        # with freq
        idx1 = DatetimeIndex(['2011-01-01', '2011-01-02',
                              '2011-01-03'], freq='D', name='idx')
        idx2 = DatetimeIndex(['2011-01-01 09:00', '2011-01-01 10:00',
                              '2011-01-01 11:00'], freq='H',
                             tz='Asia/Tokyo', name='tzidx')

        for idx in [idx1, idx2]:
            ordered = idx.sort_values()
            self.assert_index_equal(ordered, idx)
            self.assertEqual(ordered.freq, idx.freq)

            ordered = idx.sort_values(ascending=False)
            expected = idx[::-1]
            self.assert_index_equal(ordered, expected)
            self.assertEqual(ordered.freq, expected.freq)
            self.assertEqual(ordered.freq.n, -1)

            ordered, indexer = idx.sort_values(return_indexer=True)
            self.assert_index_equal(ordered, idx)
            self.assert_numpy_array_equal(indexer,
                                          np.array([0, 1, 2]),
                                          check_dtype=False)
            self.assertEqual(ordered.freq, idx.freq)

            ordered, indexer = idx.sort_values(return_indexer=True,
                                               ascending=False)
            expected = idx[::-1]
            self.assert_index_equal(ordered, expected)
            self.assert_numpy_array_equal(indexer,
                                          np.array([2, 1, 0]),
                                          check_dtype=False)
            self.assertEqual(ordered.freq, expected.freq)
            self.assertEqual(ordered.freq.n, -1)

        # without freq
        for tz in self.tz:
            idx1 = DatetimeIndex(['2011-01-01', '2011-01-03', '2011-01-05',
                                  '2011-01-02', '2011-01-01'],
                                 tz=tz, name='idx1')
            exp1 = DatetimeIndex(['2011-01-01', '2011-01-01', '2011-01-02',
                                  '2011-01-03', '2011-01-05'],
                                 tz=tz, name='idx1')

            idx2 = DatetimeIndex(['2011-01-01', '2011-01-03', '2011-01-05',
                                  '2011-01-02', '2011-01-01'],
                                 tz=tz, name='idx2')

            exp2 = DatetimeIndex(['2011-01-01', '2011-01-01', '2011-01-02',
                                  '2011-01-03', '2011-01-05'],
                                 tz=tz, name='idx2')

            idx3 = DatetimeIndex([pd.NaT, '2011-01-03', '2011-01-05',
                                  '2011-01-02', pd.NaT], tz=tz, name='idx3')
            exp3 = DatetimeIndex([pd.NaT, pd.NaT, '2011-01-02', '2011-01-03',
                                  '2011-01-05'], tz=tz, name='idx3')

            for idx, expected in [(idx1, exp1), (idx2, exp2), (idx3, exp3)]:
                ordered = idx.sort_values()
                self.assert_index_equal(ordered, expected)
                self.assertIsNone(ordered.freq)

                ordered = idx.sort_values(ascending=False)
                self.assert_index_equal(ordered, expected[::-1])
                self.assertIsNone(ordered.freq)

                ordered, indexer = idx.sort_values(return_indexer=True)
                self.assert_index_equal(ordered, expected)

                exp = np.array([0, 4, 3, 1, 2])
                self.assert_numpy_array_equal(indexer, exp, check_dtype=False)
                self.assertIsNone(ordered.freq)

                ordered, indexer = idx.sort_values(return_indexer=True,
                                                   ascending=False)
                self.assert_index_equal(ordered, expected[::-1])

                exp = np.array([2, 1, 3, 4, 0])
                self.assert_numpy_array_equal(indexer, exp, check_dtype=False)
                self.assertIsNone(ordered.freq)