Example #1
0
class TimestampOps(object):
    goal_time = 0.2

    def setup(self):
        self.ts = Timestamp('2017-08-25 08:16:14')
        self.ts_tz = Timestamp('2017-08-25 08:16:14', tz='US/Eastern')

        dt = datetime.datetime(2016, 3, 27, 1)
        self.tzinfo = pytz.timezone('CET').localize(dt, is_dst=False).tzinfo
        self.ts2 = Timestamp(dt)

    def time_replace_tz(self):
        self.ts.replace(tzinfo=pytz.timezone('US/Eastern'))

    def time_replace_across_dst(self):
        self.ts2.replace(tzinfo=self.tzinfo)

    def time_replace_None(self):
        self.ts_tz.replace(tzinfo=None)

    def time_to_pydatetime(self):
        self.ts.to_pydatetime()

    def time_to_pydatetime_tz(self):
        self.ts_tz.to_pydatetime()
Example #2
0
 def test_timestamp_tz_localize_nonexistent_raise(self, tz):
     # GH 8917
     ts = Timestamp('2015-03-29 02:20:00')
     with pytest.raises(pytz.NonExistentTimeError):
         ts.tz_localize(tz, nonexistent='raise')
     with pytest.raises(ValueError):
         ts.tz_localize(tz, nonexistent='foo')
Example #3
0
    def test_datetime_name_accessors(self, time_locale):
        # Test Monday -> Sunday and January -> December, in that sequence
        if time_locale is None:
            # If the time_locale is None, day-name and month_name should
            # return the english attributes
            expected_days = ['Monday', 'Tuesday', 'Wednesday', 'Thursday',
                             'Friday', 'Saturday', 'Sunday']
            expected_months = ['January', 'February', 'March', 'April', 'May',
                               'June', 'July', 'August', 'September',
                               'October', 'November', 'December']
        else:
            with tm.set_locale(time_locale, locale.LC_TIME):
                expected_days = calendar.day_name[:]
                expected_months = calendar.month_name[1:]

        # GH#11128
        dti = pd.date_range(freq='D', start=datetime(1998, 1, 1),
                            periods=365)
        english_days = ['Monday', 'Tuesday', 'Wednesday', 'Thursday',
                        'Friday', 'Saturday', 'Sunday']
        for day, name, eng_name in zip(range(4, 11),
                                       expected_days,
                                       english_days):
            name = name.capitalize()
            assert dti.weekday_name[day] == eng_name
            assert dti.day_name(locale=time_locale)[day] == name
            ts = Timestamp(datetime(2016, 4, day))
            with tm.assert_produces_warning(FutureWarning,
                                            check_stacklevel=False):
                assert ts.weekday_name == eng_name
            assert ts.day_name(locale=time_locale) == name
        dti = dti.append(DatetimeIndex([pd.NaT]))
        assert np.isnan(dti.day_name(locale=time_locale)[-1])
        ts = Timestamp(pd.NaT)
        assert np.isnan(ts.day_name(locale=time_locale))

        # GH#12805
        dti = pd.date_range(freq='M', start='2012', end='2013')
        result = dti.month_name(locale=time_locale)
        expected = Index([month.capitalize() for month in expected_months])

        # work around different normalization schemes
        # https://github.com/pandas-dev/pandas/issues/22342
        if not compat.PY2:
            result = result.str.normalize("NFD")
            expected = expected.str.normalize("NFD")

        tm.assert_index_equal(result, expected)

        for date, expected in zip(dti, expected_months):
            result = date.month_name(locale=time_locale)
            expected = expected.capitalize()

            if not compat.PY2:
                result = unicodedata.normalize("NFD", result)
                expected = unicodedata.normalize("NFD", result)

            assert result == expected
        dti = dti.append(DatetimeIndex([pd.NaT]))
        assert np.isnan(dti.month_name(locale=time_locale)[-1])
Example #4
0
class TimestampProperties:
    _tzs = [None, pytz.timezone('Europe/Amsterdam'), pytz.UTC,
            dateutil.tz.tzutc()]
    _freqs = [None, 'B']
    params = [_tzs, _freqs]
    param_names = ['tz', 'freq']

    def setup(self, tz, freq):
        self.ts = Timestamp('2017-08-25 08:16:14', tzinfo=tz, freq=freq)

    def time_tz(self, tz, freq):
        self.ts.tz

    def time_dayofweek(self, tz, freq):
        self.ts.dayofweek

    def time_weekday_name(self, tz, freq):
        self.ts.day_name

    def time_dayofyear(self, tz, freq):
        self.ts.dayofyear

    def time_week(self, tz, freq):
        self.ts.week

    def time_quarter(self, tz, freq):
        self.ts.quarter

    def time_days_in_month(self, tz, freq):
        self.ts.days_in_month

    def time_freqstr(self, tz, freq):
        self.ts.freqstr

    def time_is_month_start(self, tz, freq):
        self.ts.is_month_start

    def time_is_month_end(self, tz, freq):
        self.ts.is_month_end

    def time_is_quarter_start(self, tz, freq):
        self.ts.is_quarter_start

    def time_is_quarter_end(self, tz, freq):
        self.ts.is_quarter_end

    def time_is_year_start(self, tz, freq):
        self.ts.is_year_start

    def time_is_year_end(self, tz, freq):
        self.ts.is_year_end

    def time_is_leap_year(self, tz, freq):
        self.ts.is_leap_year

    def time_microsecond(self, tz, freq):
        self.ts.microsecond

    def time_month_name(self, tz, freq):
        self.ts.month_name()
Example #5
0
    def test_timestamp_tz_localize(self, tz):
        stamp = Timestamp('3/11/2012 04:00')

        result = stamp.tz_localize(tz)
        expected = Timestamp('3/11/2012 04:00', tz=tz)
        assert result.hour == expected.hour
        assert result == expected
Example #6
0
 def test_replace_preserves_nanos(self, tz_aware_fixture):
     tz = tz_aware_fixture
     # GH#14621, GH#7825
     ts = Timestamp('2016-01-01 09:00:00.000000123', tz=tz)
     result = ts.replace(hour=0)
     expected = Timestamp('2016-01-01 00:00:00.000000123', tz=tz)
     assert result == expected
Example #7
0
 def test_timestamp_to_datetime_tzoffset(self):
     #tzoffset
     from dateutil.tz import tzoffset
     tzinfo = tzoffset(None, 7200)
     expected = Timestamp('3/11/2012 04:00', tz=tzinfo)
     result = Timestamp(expected.to_datetime())
     self.assertEquals(expected, result)
Example #8
0
    def setup(self):
        self.ts = Timestamp('2017-08-25 08:16:14')
        self.ts_tz = Timestamp('2017-08-25 08:16:14', tz='US/Eastern')

        dt = datetime.datetime(2016, 3, 27, 1)
        self.tzinfo = pytz.timezone('CET').localize(dt, is_dst=False).tzinfo
        self.ts2 = Timestamp(dt)
Example #9
0
 def test_tz_localize_errors_invalid_arg(self):
     # GH 22644
     tz = 'Europe/Warsaw'
     ts = Timestamp('2015-03-29 02:00:00')
     with pytest.raises(ValueError):
         with tm.assert_produces_warning(FutureWarning):
             ts.tz_localize(tz, errors='foo')
Example #10
0
 def test_constructor_strptime(self):
     # GH25016
     # Test support for Timestamp.strptime
     fmt = '%Y%m%d-%H%M%S-%f%z'
     ts = '20190129-235348-000001+0000'
     with pytest.raises(NotImplementedError):
         Timestamp.strptime(ts, fmt)
Example #11
0
 def test_replace_aware(self, tz):
     # GH#14621, GH#7825
     # replacing datetime components with and w/o presence of a timezone
     ts = Timestamp('2016-01-01 09:00:00', tz=tz)
     result = ts.replace(hour=0)
     expected = Timestamp('2016-01-01 00:00:00', tz=tz)
     assert result == expected
Example #12
0
    def test_names(self, data, time_locale):
        # GH 17354
        # Test .weekday_name, .day_name(), .month_name
        with tm.assert_produces_warning(FutureWarning,
                                        check_stacklevel=False):
            assert data.weekday_name == 'Monday'
        if time_locale is None:
            expected_day = 'Monday'
            expected_month = 'August'
        else:
            with tm.set_locale(time_locale, locale.LC_TIME):
                expected_day = calendar.day_name[0].capitalize()
                expected_month = calendar.month_name[8].capitalize()

        result_day = data.day_name(time_locale)
        result_month = data.month_name(time_locale)

        # Work around https://github.com/pandas-dev/pandas/issues/22342
        # different normalizations

        if not PY2:
            expected_day = unicodedata.normalize("NFD", expected_day)
            expected_month = unicodedata.normalize("NFD", expected_month)

            result_day = unicodedata.normalize("NFD", result_day,)
            result_month = unicodedata.normalize("NFD", result_month)

        assert result_day == expected_day
        assert result_month == expected_month

        # Test NaT
        nan_ts = Timestamp(NaT)
        assert np.isnan(nan_ts.day_name(time_locale))
        assert np.isnan(nan_ts.month_name(time_locale))
Example #13
0
 def test_to_period_tz_warning(self):
     # GH#21333 make sure a warning is issued when timezone
     # info is lost
     ts = Timestamp('2009-04-15 16:17:18', tz='US/Eastern')
     with tm.assert_produces_warning(UserWarning):
         # warning that timezone info will be lost
         ts.to_period('D')
    def test_timestamp_tz_localize_explicit(self):
        stamp = Timestamp("3/11/2012 04:00")

        result = stamp.tz_localize(self.tz("US/Eastern"))
        expected = Timestamp("3/11/2012 04:00", tz=self.tz("US/Eastern"))
        self.assertEqual(result.hour, expected.hour)
        self.assertEqual(result, expected)
Example #15
0
    def test_cant_compare_tz_naive_w_aware(self, utc_fixture):
        # see GH#1404
        a = Timestamp('3/12/2012')
        b = Timestamp('3/12/2012', tz=utc_fixture)

        with pytest.raises(TypeError):
            a == b
        with pytest.raises(TypeError):
            a != b
        with pytest.raises(TypeError):
            a < b
        with pytest.raises(TypeError):
            a <= b
        with pytest.raises(TypeError):
            a > b
        with pytest.raises(TypeError):
            a >= b

        with pytest.raises(TypeError):
            b == a
        with pytest.raises(TypeError):
            b != a
        with pytest.raises(TypeError):
            b < a
        with pytest.raises(TypeError):
            b <= a
        with pytest.raises(TypeError):
            b > a
        with pytest.raises(TypeError):
            b >= a

        assert not a == b.to_pydatetime()
        assert not a.to_pydatetime() == b
Example #16
0
    def test_timestamp_tz_localize(self):
        stamp = Timestamp('3/11/2012 04:00')

        result = stamp.tz_localize('US/Eastern')
        expected = Timestamp('3/11/2012 04:00', tz='US/Eastern')
        self.assertEquals(result.hour, expected.hour)
        self.assertEquals(result, expected)
Example #17
0
class TimestampAcrossDst:
    def setup(self):
        dt = datetime.datetime(2016, 3, 27, 1)
        self.tzinfo = pytz.timezone('CET').localize(dt, is_dst=False).tzinfo
        self.ts2 = Timestamp(dt)

    def time_replace_across_dst(self):
        self.ts2.replace(tzinfo=self.tzinfo)
Example #18
0
    def test_tz_convert_roundtrip(self, stamp, tz):
        ts = Timestamp(stamp, tz='UTC')
        converted = ts.tz_convert(tz)

        reset = converted.tz_convert(None)
        assert reset == Timestamp(stamp)
        assert reset.tzinfo is None
        assert reset == converted.tz_convert('UTC').tz_localize(None)
Example #19
0
 def test_astimezone(self, tzstr):
     # astimezone is an alias for tz_convert, so keep it with
     # the tz_convert tests
     utcdate = Timestamp('3/11/2012 22:00', tz='UTC')
     expected = utcdate.tz_convert(tzstr)
     result = utcdate.astimezone(tzstr)
     assert expected == result
     assert isinstance(result, Timestamp)
Example #20
0
 def test_timestamp_tz_localize_nonexistent_shift_invalid(self, offset,
                                                          tz_type):
     # GH 8917, 24466
     tz = tz_type + 'Europe/Warsaw'
     ts = Timestamp('2015-03-29 02:20:00')
     msg = "The provided timedelta will relocalize on a nonexistent time"
     with pytest.raises(ValueError, match=msg):
         ts.tz_localize(tz, nonexistent=timedelta(seconds=offset))
Example #21
0
 def test_replace_dst_fold(self, fold, tz):
     # GH 25017
     d = datetime(2019, 10, 27, 2, 30)
     ts = Timestamp(d, tz=tz)
     result = ts.replace(hour=1, fold=fold)
     expected = Timestamp(datetime(2019, 10, 27, 1, 30)).tz_localize(
         tz, ambiguous=not fold
     )
     assert result == expected
Example #22
0
 def test_replace_multiple(self, tz):
     # GH#14621, GH#7825
     # replacing datetime components with and w/o presence of a timezone
     # test all
     ts = Timestamp('2016-01-01 09:00:00.000000123', tz=tz)
     result = ts.replace(year=2015, month=2, day=2, hour=0, minute=5,
                         second=5, microsecond=5, nanosecond=5)
     expected = Timestamp('2015-02-02 00:05:05.000005005', tz=tz)
     assert result == expected
Example #23
0
 def test_tz_localize_errors_coerce(self):
     # GH 22644
     # make sure errors='coerce' gets mapped correctly to nonexistent
     tz = 'Europe/Warsaw'
     ts = Timestamp('2015-03-29 02:00:00')
     with tm.assert_produces_warning(FutureWarning):
         result = ts.tz_localize(tz, errors='coerce')
     expected = ts.tz_localize(tz, nonexistent='NaT')
     assert result is expected
Example #24
0
    def test_round_tzaware(self):
        dt = Timestamp('20130101 09:10:11', tz='US/Eastern')
        result = dt.round('D')
        expected = Timestamp('20130101', tz='US/Eastern')
        assert result == expected

        dt = Timestamp('20130101 09:10:11', tz='US/Eastern')
        result = dt.round('s')
        assert result == dt
Example #25
0
    def test_to_pydatetime_nonzero_nano(self):
        ts = Timestamp('2011-01-01 9:00:00.123456789')

        # Warn the user of data loss (nanoseconds).
        with tm.assert_produces_warning(UserWarning,
                                        check_stacklevel=False):
            expected = datetime(2011, 1, 1, 9, 0, 0, 123456)
            result = ts.to_pydatetime()
            assert result == expected
Example #26
0
    def __init__(self, site, start, end, savepath='data'):
        self.site = site
        self.start = Timestamp(start)
        self.end = Timestamp(end)
        self.savepath = Path('.' or savepath)

        self._daily_json = None
        self._insta_json = None
        self._daily_data = None
        self._insta_data = None
Example #27
0
    def test_tz_localize_roundtrip(self, stamp, tz):
        ts = Timestamp(stamp)
        localized = ts.tz_localize(tz)
        assert localized == Timestamp(stamp, tz=tz)

        with pytest.raises(TypeError):
            localized.tz_localize(tz)

        reset = localized.tz_localize(None)
        assert reset == ts
        assert reset.tzinfo is None
Example #28
0
    def __init__(
        self,
        name,
        year=None,
        month=None,
        day=None,
        offset=None,
        observance=None,
        start_date=None,
        end_date=None,
        days_of_week=None,
    ):
        """
        Parameters
        ----------
        name : str
            Name of the holiday , defaults to class name
        offset : array of pandas.tseries.offsets or
                class from pandas.tseries.offsets
            computes offset from  date
        observance: function
            computes when holiday is given a pandas Timestamp
        days_of_week:
            provide a tuple of days e.g  (0,1,2,3,) for Monday Through Thursday
            Monday=0,..,Sunday=6

        Examples
        --------
        >>> from pandas.tseries.holiday import Holiday, nearest_workday
        >>> from pandas import DateOffset
        >>> from dateutil.relativedelta import MO
        >>> USMemorialDay = Holiday('MemorialDay', month=5, day=24,
                                    offset=DateOffset(weekday=MO(1)))
        >>> USLaborDay = Holiday('Labor Day', month=9, day=1,
                            offset=DateOffset(weekday=MO(1)))
        >>> July3rd = Holiday('July 3rd', month=7, day=3,)
        >>> NewYears = Holiday('New Years Day', month=1,  day=1,
                               observance=nearest_workday),
        >>> July3rd = Holiday('July 3rd', month=7, day=3,
                              days_of_week=(0, 1, 2, 3))
        """
        if offset is not None and observance is not None:
            raise NotImplementedError("Cannot use both offset and observance.")

        self.name = name
        self.year = year
        self.month = month
        self.day = day
        self.offset = offset
        self.start_date = Timestamp(start_date) if start_date is not None else start_date
        self.end_date = Timestamp(end_date) if end_date is not None else end_date
        self.observance = observance
        assert days_of_week is None or type(days_of_week) == tuple
        self.days_of_week = days_of_week
Example #29
0
 def test_timestamp_tz_localize_nonexistent_shift(self, start_ts, tz,
                                                  end_ts, shift,
                                                  tz_type):
     # GH 8917, 24466
     tz = tz_type + tz
     if isinstance(shift, str):
         shift = 'shift_' + shift
     ts = Timestamp(start_ts)
     result = ts.tz_localize(tz, nonexistent=shift)
     expected = Timestamp(end_ts).tz_localize(tz)
     assert result == expected
Example #30
0
    def test_tz_convert_utc_with_system_utc(self):
        from pandas._libs.tslibs.timezones import maybe_get_tz

        # from system utc to real utc
        ts = Timestamp('2001-01-05 11:56', tz=maybe_get_tz('dateutil/UTC'))
        # check that the time hasn't changed.
        assert ts == ts.tz_convert(dateutil.tz.tzutc())

        # from system utc to real utc
        ts = Timestamp('2001-01-05 11:56', tz=maybe_get_tz('dateutil/UTC'))
        # check that the time hasn't changed.
        assert ts == ts.tz_convert(dateutil.tz.tzutc())
Example #31
0
class TestDatetimeIndex:
    @pytest.mark.parametrize("dt_cls", [DatetimeIndex, DatetimeArray._from_sequence])
    def test_freq_validation_with_nat(self, dt_cls):
        # GH#11587 make sure we get a useful error message when generate_range
        #  raises
        msg = (
            "Inferred frequency None from passed values does not conform "
            "to passed frequency D"
        )
        with pytest.raises(ValueError, match=msg):
            dt_cls([pd.NaT, pd.Timestamp("2011-01-01")], freq="D")
        with pytest.raises(ValueError, match=msg):
            dt_cls([pd.NaT, pd.Timestamp("2011-01-01").value], freq="D")

    def test_categorical_preserves_tz(self):
        # GH#18664 retain tz when going DTI-->Categorical-->DTI
        # TODO: parametrize over DatetimeIndex/DatetimeArray
        #  once CategoricalIndex(DTA) works

        dti = pd.DatetimeIndex(
            [pd.NaT, "2015-01-01", "1999-04-06 15:14:13", "2015-01-01"], tz="US/Eastern"
        )

        ci = pd.CategoricalIndex(dti)
        carr = pd.Categorical(dti)
        cser = pd.Series(ci)

        for obj in [ci, carr, cser]:
            result = pd.DatetimeIndex(obj)
            tm.assert_index_equal(result, dti)

    def test_dti_with_period_data_raises(self):
        # GH#23675
        data = pd.PeriodIndex(["2016Q1", "2016Q2"], freq="Q")

        with pytest.raises(TypeError, match="PeriodDtype data is invalid"):
            DatetimeIndex(data)

        with pytest.raises(TypeError, match="PeriodDtype data is invalid"):
            to_datetime(data)

        with pytest.raises(TypeError, match="PeriodDtype data is invalid"):
            DatetimeIndex(period_array(data))

        with pytest.raises(TypeError, match="PeriodDtype data is invalid"):
            to_datetime(period_array(data))

    def test_dti_with_timedelta64_data_raises(self):
        # GH#23675 deprecated, enforrced in GH#29794
        data = np.array([0], dtype="m8[ns]")
        msg = r"timedelta64\[ns\] cannot be converted to datetime64"
        with pytest.raises(TypeError, match=msg):
            DatetimeIndex(data)

        with pytest.raises(TypeError, match=msg):
            to_datetime(data)

        with pytest.raises(TypeError, match=msg):
            DatetimeIndex(pd.TimedeltaIndex(data))

        with pytest.raises(TypeError, match=msg):
            to_datetime(pd.TimedeltaIndex(data))

    def test_construction_caching(self):

        df = pd.DataFrame(
            {
                "dt": pd.date_range("20130101", periods=3),
                "dttz": pd.date_range("20130101", periods=3, tz="US/Eastern"),
                "dt_with_null": [
                    pd.Timestamp("20130101"),
                    pd.NaT,
                    pd.Timestamp("20130103"),
                ],
                "dtns": pd.date_range("20130101", periods=3, freq="ns"),
            }
        )
        assert df.dttz.dtype.tz.zone == "US/Eastern"

    @pytest.mark.parametrize(
        "kwargs",
        [{"tz": "dtype.tz"}, {"dtype": "dtype"}, {"dtype": "dtype", "tz": "dtype.tz"}],
    )
    def test_construction_with_alt(self, kwargs, tz_aware_fixture):
        tz = tz_aware_fixture
        i = pd.date_range("20130101", periods=5, freq="H", tz=tz)
        kwargs = {key: attrgetter(val)(i) for key, val in kwargs.items()}
        result = DatetimeIndex(i, **kwargs)
        tm.assert_index_equal(i, result)

    @pytest.mark.parametrize(
        "kwargs",
        [{"tz": "dtype.tz"}, {"dtype": "dtype"}, {"dtype": "dtype", "tz": "dtype.tz"}],
    )
    def test_construction_with_alt_tz_localize(self, kwargs, tz_aware_fixture):
        tz = tz_aware_fixture
        i = pd.date_range("20130101", periods=5, freq="H", tz=tz)
        kwargs = {key: attrgetter(val)(i) for key, val in kwargs.items()}

        if "tz" in kwargs:
            result = DatetimeIndex(i.asi8, tz="UTC").tz_convert(kwargs["tz"])

            expected = DatetimeIndex(i, **kwargs)
            tm.assert_index_equal(result, expected)

        # localize into the provided tz
        i2 = DatetimeIndex(i.tz_localize(None).asi8, tz="UTC")
        expected = i.tz_localize(None).tz_localize("UTC")
        tm.assert_index_equal(i2, expected)

        # incompat tz/dtype
        msg = "cannot supply both a tz and a dtype with a tz"
        with pytest.raises(ValueError, match=msg):
            DatetimeIndex(i.tz_localize(None).asi8, dtype=i.dtype, tz="US/Pacific")

    def test_construction_index_with_mixed_timezones(self):
        # gh-11488: no tz results in DatetimeIndex
        result = Index([Timestamp("2011-01-01"), Timestamp("2011-01-02")], name="idx")
        exp = DatetimeIndex(
            [Timestamp("2011-01-01"), Timestamp("2011-01-02")], name="idx"
        )
        tm.assert_index_equal(result, exp, exact=True)
        assert isinstance(result, DatetimeIndex)
        assert result.tz is None

        # same tz results in DatetimeIndex
        result = Index(
            [
                Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"),
                Timestamp("2011-01-02 10:00", tz="Asia/Tokyo"),
            ],
            name="idx",
        )
        exp = DatetimeIndex(
            [Timestamp("2011-01-01 10:00"), Timestamp("2011-01-02 10:00")],
            tz="Asia/Tokyo",
            name="idx",
        )
        tm.assert_index_equal(result, exp, exact=True)
        assert isinstance(result, DatetimeIndex)
        assert result.tz is not None
        assert result.tz == exp.tz

        # same tz results in DatetimeIndex (DST)
        result = Index(
            [
                Timestamp("2011-01-01 10:00", tz="US/Eastern"),
                Timestamp("2011-08-01 10:00", tz="US/Eastern"),
            ],
            name="idx",
        )
        exp = DatetimeIndex(
            [Timestamp("2011-01-01 10:00"), Timestamp("2011-08-01 10:00")],
            tz="US/Eastern",
            name="idx",
        )
        tm.assert_index_equal(result, exp, exact=True)
        assert isinstance(result, DatetimeIndex)
        assert result.tz is not None
        assert result.tz == exp.tz

        # Different tz results in Index(dtype=object)
        result = Index(
            [
                Timestamp("2011-01-01 10:00"),
                Timestamp("2011-01-02 10:00", tz="US/Eastern"),
            ],
            name="idx",
        )
        exp = Index(
            [
                Timestamp("2011-01-01 10:00"),
                Timestamp("2011-01-02 10:00", tz="US/Eastern"),
            ],
            dtype="object",
            name="idx",
        )
        tm.assert_index_equal(result, exp, exact=True)
        assert not isinstance(result, DatetimeIndex)

        result = Index(
            [
                Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"),
                Timestamp("2011-01-02 10:00", tz="US/Eastern"),
            ],
            name="idx",
        )
        exp = Index(
            [
                Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"),
                Timestamp("2011-01-02 10:00", tz="US/Eastern"),
            ],
            dtype="object",
            name="idx",
        )
        tm.assert_index_equal(result, exp, exact=True)
        assert not isinstance(result, DatetimeIndex)

        # length = 1
        result = Index([Timestamp("2011-01-01")], name="idx")
        exp = DatetimeIndex([Timestamp("2011-01-01")], name="idx")
        tm.assert_index_equal(result, exp, exact=True)
        assert isinstance(result, DatetimeIndex)
        assert result.tz is None

        # length = 1 with tz
        result = Index([Timestamp("2011-01-01 10:00", tz="Asia/Tokyo")], name="idx")
        exp = DatetimeIndex(
            [Timestamp("2011-01-01 10:00")], tz="Asia/Tokyo", name="idx"
        )
        tm.assert_index_equal(result, exp, exact=True)
        assert isinstance(result, DatetimeIndex)
        assert result.tz is not None
        assert result.tz == exp.tz

    def test_construction_index_with_mixed_timezones_with_NaT(self):
        # see gh-11488
        result = Index(
            [pd.NaT, Timestamp("2011-01-01"), pd.NaT, Timestamp("2011-01-02")],
            name="idx",
        )
        exp = DatetimeIndex(
            [pd.NaT, Timestamp("2011-01-01"), pd.NaT, Timestamp("2011-01-02")],
            name="idx",
        )
        tm.assert_index_equal(result, exp, exact=True)
        assert isinstance(result, DatetimeIndex)
        assert result.tz is None

        # Same tz results in DatetimeIndex
        result = Index(
            [
                pd.NaT,
                Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"),
                pd.NaT,
                Timestamp("2011-01-02 10:00", tz="Asia/Tokyo"),
            ],
            name="idx",
        )
        exp = DatetimeIndex(
            [
                pd.NaT,
                Timestamp("2011-01-01 10:00"),
                pd.NaT,
                Timestamp("2011-01-02 10:00"),
            ],
            tz="Asia/Tokyo",
            name="idx",
        )
        tm.assert_index_equal(result, exp, exact=True)
        assert isinstance(result, DatetimeIndex)
        assert result.tz is not None
        assert result.tz == exp.tz

        # same tz results in DatetimeIndex (DST)
        result = Index(
            [
                Timestamp("2011-01-01 10:00", tz="US/Eastern"),
                pd.NaT,
                Timestamp("2011-08-01 10:00", tz="US/Eastern"),
            ],
            name="idx",
        )
        exp = DatetimeIndex(
            [Timestamp("2011-01-01 10:00"), pd.NaT, Timestamp("2011-08-01 10:00")],
            tz="US/Eastern",
            name="idx",
        )
        tm.assert_index_equal(result, exp, exact=True)
        assert isinstance(result, DatetimeIndex)
        assert result.tz is not None
        assert result.tz == exp.tz

        # different tz results in Index(dtype=object)
        result = Index(
            [
                pd.NaT,
                Timestamp("2011-01-01 10:00"),
                pd.NaT,
                Timestamp("2011-01-02 10:00", tz="US/Eastern"),
            ],
            name="idx",
        )
        exp = Index(
            [
                pd.NaT,
                Timestamp("2011-01-01 10:00"),
                pd.NaT,
                Timestamp("2011-01-02 10:00", tz="US/Eastern"),
            ],
            dtype="object",
            name="idx",
        )
        tm.assert_index_equal(result, exp, exact=True)
        assert not isinstance(result, DatetimeIndex)

        result = Index(
            [
                pd.NaT,
                Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"),
                pd.NaT,
                Timestamp("2011-01-02 10:00", tz="US/Eastern"),
            ],
            name="idx",
        )
        exp = Index(
            [
                pd.NaT,
                Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"),
                pd.NaT,
                Timestamp("2011-01-02 10:00", tz="US/Eastern"),
            ],
            dtype="object",
            name="idx",
        )
        tm.assert_index_equal(result, exp, exact=True)
        assert not isinstance(result, DatetimeIndex)

        # all NaT
        result = Index([pd.NaT, pd.NaT], name="idx")
        exp = DatetimeIndex([pd.NaT, pd.NaT], name="idx")
        tm.assert_index_equal(result, exp, exact=True)
        assert isinstance(result, DatetimeIndex)
        assert result.tz is None

        # all NaT with tz
        result = Index([pd.NaT, pd.NaT], tz="Asia/Tokyo", name="idx")
        exp = DatetimeIndex([pd.NaT, pd.NaT], tz="Asia/Tokyo", name="idx")

        tm.assert_index_equal(result, exp, exact=True)
        assert isinstance(result, DatetimeIndex)
        assert result.tz is not None
        assert result.tz == exp.tz

    def test_construction_dti_with_mixed_timezones(self):
        # GH 11488 (not changed, added explicit tests)

        # no tz results in DatetimeIndex
        result = DatetimeIndex(
            [Timestamp("2011-01-01"), Timestamp("2011-01-02")], name="idx"
        )
        exp = DatetimeIndex(
            [Timestamp("2011-01-01"), Timestamp("2011-01-02")], name="idx"
        )
        tm.assert_index_equal(result, exp, exact=True)
        assert isinstance(result, DatetimeIndex)

        # same tz results in DatetimeIndex
        result = DatetimeIndex(
            [
                Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"),
                Timestamp("2011-01-02 10:00", tz="Asia/Tokyo"),
            ],
            name="idx",
        )
        exp = DatetimeIndex(
            [Timestamp("2011-01-01 10:00"), Timestamp("2011-01-02 10:00")],
            tz="Asia/Tokyo",
            name="idx",
        )
        tm.assert_index_equal(result, exp, exact=True)
        assert isinstance(result, DatetimeIndex)

        # same tz results in DatetimeIndex (DST)
        result = DatetimeIndex(
            [
                Timestamp("2011-01-01 10:00", tz="US/Eastern"),
                Timestamp("2011-08-01 10:00", tz="US/Eastern"),
            ],
            name="idx",
        )
        exp = DatetimeIndex(
            [Timestamp("2011-01-01 10:00"), Timestamp("2011-08-01 10:00")],
            tz="US/Eastern",
            name="idx",
        )
        tm.assert_index_equal(result, exp, exact=True)
        assert isinstance(result, DatetimeIndex)

        # tz mismatch affecting to tz-aware raises TypeError/ValueError

        with pytest.raises(ValueError):
            DatetimeIndex(
                [
                    Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"),
                    Timestamp("2011-01-02 10:00", tz="US/Eastern"),
                ],
                name="idx",
            )

        msg = "cannot be converted to datetime64"
        with pytest.raises(ValueError, match=msg):
            DatetimeIndex(
                [
                    Timestamp("2011-01-01 10:00"),
                    Timestamp("2011-01-02 10:00", tz="US/Eastern"),
                ],
                tz="Asia/Tokyo",
                name="idx",
            )

        with pytest.raises(ValueError):
            DatetimeIndex(
                [
                    Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"),
                    Timestamp("2011-01-02 10:00", tz="US/Eastern"),
                ],
                tz="US/Eastern",
                name="idx",
            )

        with pytest.raises(ValueError, match=msg):
            # passing tz should results in DatetimeIndex, then mismatch raises
            # TypeError
            Index(
                [
                    pd.NaT,
                    Timestamp("2011-01-01 10:00"),
                    pd.NaT,
                    Timestamp("2011-01-02 10:00", tz="US/Eastern"),
                ],
                tz="Asia/Tokyo",
                name="idx",
            )

    def test_construction_base_constructor(self):
        arr = [pd.Timestamp("2011-01-01"), pd.NaT, pd.Timestamp("2011-01-03")]
        tm.assert_index_equal(pd.Index(arr), pd.DatetimeIndex(arr))
        tm.assert_index_equal(pd.Index(np.array(arr)), pd.DatetimeIndex(np.array(arr)))

        arr = [np.nan, pd.NaT, pd.Timestamp("2011-01-03")]
        tm.assert_index_equal(pd.Index(arr), pd.DatetimeIndex(arr))
        tm.assert_index_equal(pd.Index(np.array(arr)), pd.DatetimeIndex(np.array(arr)))

    def test_construction_outofbounds(self):
        # GH 13663
        dates = [
            datetime(3000, 1, 1),
            datetime(4000, 1, 1),
            datetime(5000, 1, 1),
            datetime(6000, 1, 1),
        ]
        exp = Index(dates, dtype=object)
        # coerces to object
        tm.assert_index_equal(Index(dates), exp)

        with pytest.raises(OutOfBoundsDatetime):
            # can't create DatetimeIndex
            DatetimeIndex(dates)

    def test_construction_with_ndarray(self):
        # GH 5152
        dates = [datetime(2013, 10, 7), datetime(2013, 10, 8), datetime(2013, 10, 9)]
        data = DatetimeIndex(dates, freq=pd.offsets.BDay()).values
        result = DatetimeIndex(data, freq=pd.offsets.BDay())
        expected = DatetimeIndex(["2013-10-07", "2013-10-08", "2013-10-09"], freq="B")
        tm.assert_index_equal(result, expected)

    def test_integer_values_and_tz_interpreted_as_utc(self):
        # GH-24559
        val = np.datetime64("2000-01-01 00:00:00", "ns")
        values = np.array([val.view("i8")])

        result = DatetimeIndex(values).tz_localize("US/Central")

        expected = pd.DatetimeIndex(["2000-01-01T00:00:00"], tz="US/Central")
        tm.assert_index_equal(result, expected)

        # but UTC is *not* deprecated.
        with tm.assert_produces_warning(None):
            result = DatetimeIndex(values, tz="UTC")
        expected = pd.DatetimeIndex(["2000-01-01T00:00:00"], tz="US/Central")

    def test_constructor_coverage(self):
        rng = date_range("1/1/2000", periods=10.5)
        exp = date_range("1/1/2000", periods=10)
        tm.assert_index_equal(rng, exp)

        msg = "periods must be a number, got foo"
        with pytest.raises(TypeError, match=msg):
            date_range(start="1/1/2000", periods="foo", freq="D")

        with pytest.raises(TypeError):
            DatetimeIndex("1/1/2000")

        # generator expression
        gen = (datetime(2000, 1, 1) + timedelta(i) for i in range(10))
        result = DatetimeIndex(gen)
        expected = DatetimeIndex(
            [datetime(2000, 1, 1) + timedelta(i) for i in range(10)]
        )
        tm.assert_index_equal(result, expected)

        # NumPy string array
        strings = np.array(["2000-01-01", "2000-01-02", "2000-01-03"])
        result = DatetimeIndex(strings)
        expected = DatetimeIndex(strings.astype("O"))
        tm.assert_index_equal(result, expected)

        from_ints = DatetimeIndex(expected.asi8)
        tm.assert_index_equal(from_ints, expected)

        # string with NaT
        strings = np.array(["2000-01-01", "2000-01-02", "NaT"])
        result = DatetimeIndex(strings)
        expected = DatetimeIndex(strings.astype("O"))
        tm.assert_index_equal(result, expected)

        from_ints = DatetimeIndex(expected.asi8)
        tm.assert_index_equal(from_ints, expected)

        # non-conforming
        msg = (
            "Inferred frequency None from passed values does not conform"
            " to passed frequency D"
        )
        with pytest.raises(ValueError, match=msg):
            DatetimeIndex(["2000-01-01", "2000-01-02", "2000-01-04"], freq="D")

        msg = (
            "Of the four parameters: start, end, periods, and freq, exactly"
            " three must be specified"
        )
        with pytest.raises(ValueError, match=msg):
            date_range(start="2011-01-01", freq="b")
        with pytest.raises(ValueError, match=msg):
            date_range(end="2011-01-01", freq="B")
        with pytest.raises(ValueError, match=msg):
            date_range(periods=10, freq="D")

    @pytest.mark.parametrize("freq", ["AS", "W-SUN"])
    def test_constructor_datetime64_tzformat(self, freq):
        # see GH#6572: ISO 8601 format results in pytz.FixedOffset
        idx = date_range(
            "2013-01-01T00:00:00-05:00", "2016-01-01T23:59:59-05:00", freq=freq
        )
        expected = date_range(
            "2013-01-01T00:00:00",
            "2016-01-01T23:59:59",
            freq=freq,
            tz=pytz.FixedOffset(-300),
        )
        tm.assert_index_equal(idx, expected)
        # Unable to use `US/Eastern` because of DST
        expected_i8 = date_range(
            "2013-01-01T00:00:00", "2016-01-01T23:59:59", freq=freq, tz="America/Lima"
        )
        tm.assert_numpy_array_equal(idx.asi8, expected_i8.asi8)

        idx = date_range(
            "2013-01-01T00:00:00+09:00", "2016-01-01T23:59:59+09:00", freq=freq
        )
        expected = date_range(
            "2013-01-01T00:00:00",
            "2016-01-01T23:59:59",
            freq=freq,
            tz=pytz.FixedOffset(540),
        )
        tm.assert_index_equal(idx, expected)
        expected_i8 = date_range(
            "2013-01-01T00:00:00", "2016-01-01T23:59:59", freq=freq, tz="Asia/Tokyo"
        )
        tm.assert_numpy_array_equal(idx.asi8, expected_i8.asi8)

        # Non ISO 8601 format results in dateutil.tz.tzoffset
        idx = date_range("2013/1/1 0:00:00-5:00", "2016/1/1 23:59:59-5:00", freq=freq)
        expected = date_range(
            "2013-01-01T00:00:00",
            "2016-01-01T23:59:59",
            freq=freq,
            tz=pytz.FixedOffset(-300),
        )
        tm.assert_index_equal(idx, expected)
        # Unable to use `US/Eastern` because of DST
        expected_i8 = date_range(
            "2013-01-01T00:00:00", "2016-01-01T23:59:59", freq=freq, tz="America/Lima"
        )
        tm.assert_numpy_array_equal(idx.asi8, expected_i8.asi8)

        idx = date_range("2013/1/1 0:00:00+9:00", "2016/1/1 23:59:59+09:00", freq=freq)
        expected = date_range(
            "2013-01-01T00:00:00",
            "2016-01-01T23:59:59",
            freq=freq,
            tz=pytz.FixedOffset(540),
        )
        tm.assert_index_equal(idx, expected)
        expected_i8 = date_range(
            "2013-01-01T00:00:00", "2016-01-01T23:59:59", freq=freq, tz="Asia/Tokyo"
        )
        tm.assert_numpy_array_equal(idx.asi8, expected_i8.asi8)

    def test_constructor_dtype(self):

        # passing a dtype with a tz should localize
        idx = DatetimeIndex(
            ["2013-01-01", "2013-01-02"], dtype="datetime64[ns, US/Eastern]"
        )
        expected = DatetimeIndex(["2013-01-01", "2013-01-02"]).tz_localize("US/Eastern")
        tm.assert_index_equal(idx, expected)

        idx = DatetimeIndex(["2013-01-01", "2013-01-02"], tz="US/Eastern")
        tm.assert_index_equal(idx, expected)

        # if we already have a tz and its not the same, then raise
        idx = DatetimeIndex(
            ["2013-01-01", "2013-01-02"], dtype="datetime64[ns, US/Eastern]"
        )

        msg = (
            "cannot supply both a tz and a timezone-naive dtype"
            r" \(i\.e\. datetime64\[ns\]\)"
        )
        with pytest.raises(ValueError, match=msg):
            DatetimeIndex(idx, dtype="datetime64[ns]")

        # this is effectively trying to convert tz's
        msg = "data is already tz-aware US/Eastern, unable to set specified tz: CET"
        with pytest.raises(TypeError, match=msg):
            DatetimeIndex(idx, dtype="datetime64[ns, CET]")
        msg = "cannot supply both a tz and a dtype with a tz"
        with pytest.raises(ValueError, match=msg):
            DatetimeIndex(idx, tz="CET", dtype="datetime64[ns, US/Eastern]")

        result = DatetimeIndex(idx, dtype="datetime64[ns, US/Eastern]")
        tm.assert_index_equal(idx, result)

    @pytest.mark.parametrize("dtype", [object, np.int32, np.int64])
    def test_constructor_invalid_dtype_raises(self, dtype):
        # GH 23986
        with pytest.raises(ValueError):
            DatetimeIndex([1, 2], dtype=dtype)

    def test_constructor_name(self):
        idx = date_range(start="2000-01-01", periods=1, freq="A", name="TEST")
        assert idx.name == "TEST"

    def test_000constructor_resolution(self):
        # 2252
        t1 = Timestamp((1352934390 * 1000000000) + 1000000 + 1000 + 1)
        idx = DatetimeIndex([t1])

        assert idx.nanosecond[0] == t1.nanosecond

    def test_disallow_setting_tz(self):
        # GH 3746
        dti = DatetimeIndex(["2010"], tz="UTC")
        with pytest.raises(AttributeError):
            dti.tz = pytz.timezone("US/Pacific")

    @pytest.mark.parametrize(
        "tz",
        [
            None,
            "America/Los_Angeles",
            pytz.timezone("America/Los_Angeles"),
            Timestamp("2000", tz="America/Los_Angeles").tz,
        ],
    )
    def test_constructor_start_end_with_tz(self, tz):
        # GH 18595
        start = Timestamp("2013-01-01 06:00:00", tz="America/Los_Angeles")
        end = Timestamp("2013-01-02 06:00:00", tz="America/Los_Angeles")
        result = date_range(freq="D", start=start, end=end, tz=tz)
        expected = DatetimeIndex(
            ["2013-01-01 06:00:00", "2013-01-02 06:00:00"], tz="America/Los_Angeles"
        )
        tm.assert_index_equal(result, expected)
        # Especially assert that the timezone is consistent for pytz
        assert pytz.timezone("America/Los_Angeles") is result.tz

    @pytest.mark.parametrize("tz", ["US/Pacific", "US/Eastern", "Asia/Tokyo"])
    def test_constructor_with_non_normalized_pytz(self, tz):
        # GH 18595
        non_norm_tz = Timestamp("2010", tz=tz).tz
        result = DatetimeIndex(["2010"], tz=non_norm_tz)
        assert pytz.timezone(tz) is result.tz

    def test_constructor_timestamp_near_dst(self):
        # GH 20854
        ts = [
            Timestamp("2016-10-30 03:00:00+0300", tz="Europe/Helsinki"),
            Timestamp("2016-10-30 03:00:00+0200", tz="Europe/Helsinki"),
        ]
        result = DatetimeIndex(ts)
        expected = DatetimeIndex([ts[0].to_pydatetime(), ts[1].to_pydatetime()])
        tm.assert_index_equal(result, expected)

    # TODO(GH-24559): Remove the xfail for the tz-aware case.
    @pytest.mark.parametrize("klass", [Index, DatetimeIndex])
    @pytest.mark.parametrize("box", [np.array, partial(np.array, dtype=object), list])
    @pytest.mark.parametrize(
        "tz, dtype",
        [("US/Pacific", "datetime64[ns, US/Pacific]"), (None, "datetime64[ns]")],
    )
    def test_constructor_with_int_tz(self, klass, box, tz, dtype):
        # GH 20997, 20964
        ts = Timestamp("2018-01-01", tz=tz)
        result = klass(box([ts.value]), dtype=dtype)
        expected = klass([ts])
        assert result == expected

    # This is the desired future behavior
    # Note: this xfail is not strict because the test passes with
    #  None or any of the UTC variants for tz_naive_fixture
    @pytest.mark.xfail(reason="Future behavior", strict=False)
    @pytest.mark.filterwarnings("ignore:\\n    Passing:FutureWarning")
    def test_construction_int_rountrip(self, tz_naive_fixture):
        # GH 12619
        # TODO(GH-24559): Remove xfail
        tz = tz_naive_fixture
        result = 1293858000000000000
        expected = DatetimeIndex([result], tz=tz).asi8[0]
        assert result == expected

    def test_construction_from_replaced_timestamps_with_dst(self):
        # GH 18785
        index = pd.date_range(
            pd.Timestamp(2000, 1, 1),
            pd.Timestamp(2005, 1, 1),
            freq="MS",
            tz="Australia/Melbourne",
        )
        test = pd.DataFrame({"data": range(len(index))}, index=index)
        test = test.resample("Y").mean()
        result = pd.DatetimeIndex([x.replace(month=6, day=1) for x in test.index])
        expected = pd.DatetimeIndex(
            [
                "2000-06-01 00:00:00",
                "2001-06-01 00:00:00",
                "2002-06-01 00:00:00",
                "2003-06-01 00:00:00",
                "2004-06-01 00:00:00",
                "2005-06-01 00:00:00",
            ],
            tz="Australia/Melbourne",
        )
        tm.assert_index_equal(result, expected)

    def test_construction_with_tz_and_tz_aware_dti(self):
        # GH 23579
        dti = date_range("2016-01-01", periods=3, tz="US/Central")
        with pytest.raises(TypeError):
            DatetimeIndex(dti, tz="Asia/Tokyo")

    def test_construction_with_nat_and_tzlocal(self):
        tz = dateutil.tz.tzlocal()
        result = DatetimeIndex(["2018", "NaT"], tz=tz)
        expected = DatetimeIndex([Timestamp("2018", tz=tz), pd.NaT])
        tm.assert_index_equal(result, expected)

    def test_constructor_no_precision_raises(self):
        # GH-24753, GH-24739

        msg = "with no precision is not allowed"
        with pytest.raises(ValueError, match=msg):
            pd.DatetimeIndex(["2000"], dtype="datetime64")

        with pytest.raises(ValueError, match=msg):
            pd.Index(["2000"], dtype="datetime64")

    def test_constructor_wrong_precision_raises(self):
        with pytest.raises(ValueError):
            pd.DatetimeIndex(["2000"], dtype="datetime64[us]")

    def test_index_constructor_with_numpy_object_array_and_timestamp_tz_with_nan(self):
        # GH 27011
        result = Index(np.array([Timestamp("2019", tz="UTC"), np.nan], dtype=object))
        expected = DatetimeIndex([Timestamp("2019", tz="UTC"), pd.NaT])
        tm.assert_index_equal(result, expected)
Example #32
0
    def test_construction_dti_with_mixed_timezones(self):
        # GH 11488 (not changed, added explicit tests)

        # no tz results in DatetimeIndex
        result = DatetimeIndex(
            [Timestamp("2011-01-01"), Timestamp("2011-01-02")], name="idx"
        )
        exp = DatetimeIndex(
            [Timestamp("2011-01-01"), Timestamp("2011-01-02")], name="idx"
        )
        tm.assert_index_equal(result, exp, exact=True)
        assert isinstance(result, DatetimeIndex)

        # same tz results in DatetimeIndex
        result = DatetimeIndex(
            [
                Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"),
                Timestamp("2011-01-02 10:00", tz="Asia/Tokyo"),
            ],
            name="idx",
        )
        exp = DatetimeIndex(
            [Timestamp("2011-01-01 10:00"), Timestamp("2011-01-02 10:00")],
            tz="Asia/Tokyo",
            name="idx",
        )
        tm.assert_index_equal(result, exp, exact=True)
        assert isinstance(result, DatetimeIndex)

        # same tz results in DatetimeIndex (DST)
        result = DatetimeIndex(
            [
                Timestamp("2011-01-01 10:00", tz="US/Eastern"),
                Timestamp("2011-08-01 10:00", tz="US/Eastern"),
            ],
            name="idx",
        )
        exp = DatetimeIndex(
            [Timestamp("2011-01-01 10:00"), Timestamp("2011-08-01 10:00")],
            tz="US/Eastern",
            name="idx",
        )
        tm.assert_index_equal(result, exp, exact=True)
        assert isinstance(result, DatetimeIndex)

        # tz mismatch affecting to tz-aware raises TypeError/ValueError

        with pytest.raises(ValueError):
            DatetimeIndex(
                [
                    Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"),
                    Timestamp("2011-01-02 10:00", tz="US/Eastern"),
                ],
                name="idx",
            )

        msg = "cannot be converted to datetime64"
        with pytest.raises(ValueError, match=msg):
            DatetimeIndex(
                [
                    Timestamp("2011-01-01 10:00"),
                    Timestamp("2011-01-02 10:00", tz="US/Eastern"),
                ],
                tz="Asia/Tokyo",
                name="idx",
            )

        with pytest.raises(ValueError):
            DatetimeIndex(
                [
                    Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"),
                    Timestamp("2011-01-02 10:00", tz="US/Eastern"),
                ],
                tz="US/Eastern",
                name="idx",
            )

        with pytest.raises(ValueError, match=msg):
            # passing tz should results in DatetimeIndex, then mismatch raises
            # TypeError
            Index(
                [
                    pd.NaT,
                    Timestamp("2011-01-01 10:00"),
                    pd.NaT,
                    Timestamp("2011-01-02 10:00", tz="US/Eastern"),
                ],
                tz="Asia/Tokyo",
                name="idx",
            )
Example #33
0
 def test_td_sub_timedeltalike_object_dtype_array(self):
     # GH#21980
     arr = np.array([Timestamp("20130101 9:01"), Timestamp("20121230 9:02")])
     exp = np.array([Timestamp("20121231 9:01"), Timestamp("20121229 9:02")])
     res = arr - Timedelta("1D")
     tm.assert_numpy_array_equal(res, exp)
Example #34
0
class TestDataFrameSetitemCopyViewSemantics:
    def test_setitem_always_copy(self, float_frame):
        assert "E" not in float_frame.columns
        s = float_frame["A"].copy()
        float_frame["E"] = s

        float_frame["E"][5:10] = np.nan
        assert notna(s[5:10]).all()

    @pytest.mark.parametrize("consolidate", [True, False])
    def test_setitem_partial_column_inplace(self, consolidate,
                                            using_array_manager):
        # This setting should be in-place, regardless of whether frame is
        #  single-block or multi-block
        # GH#304 this used to be incorrectly not-inplace, in which case
        #  we needed to ensure _item_cache was cleared.

        df = DataFrame({
            "x": [1.1, 2.1, 3.1, 4.1],
            "y": [5.1, 6.1, 7.1, 8.1]
        },
                       index=[0, 1, 2, 3])
        df.insert(2, "z", np.nan)
        if not using_array_manager:
            if consolidate:
                df._consolidate_inplace()
                assert len(df._mgr.blocks) == 1
            else:
                assert len(df._mgr.blocks) == 2

        zvals = df["z"]._values

        df.loc[2:, "z"] = 42

        expected = Series([np.nan, np.nan, 42, 42], index=df.index, name="z")
        tm.assert_series_equal(df["z"], expected)

        # check setting occurred in-place
        tm.assert_numpy_array_equal(zvals, expected.values)
        assert np.shares_memory(zvals, df["z"]._values)

    def test_setitem_duplicate_columns_not_inplace(self):
        # GH#39510
        cols = ["A", "B"] * 2
        df = DataFrame(0.0, index=[0], columns=cols)
        df_copy = df.copy()
        df_view = df[:]
        df["B"] = (2, 5)

        expected = DataFrame([[0.0, 2, 0.0, 5]], columns=cols)
        tm.assert_frame_equal(df_view, df_copy)
        tm.assert_frame_equal(df, expected)

    @pytest.mark.parametrize(
        "value", [1, np.array([[1], [1]], dtype="int64"), [[1], [1]]])
    def test_setitem_same_dtype_not_inplace(self, value, using_array_manager):
        # GH#39510
        cols = ["A", "B"]
        df = DataFrame(0, index=[0, 1], columns=cols)
        df_copy = df.copy()
        df_view = df[:]
        df[["B"]] = value

        expected = DataFrame([[0, 1], [0, 1]], columns=cols)
        tm.assert_frame_equal(df, expected)
        tm.assert_frame_equal(df_view, df_copy)

    @pytest.mark.parametrize(
        "value", [1.0, np.array([[1.0], [1.0]]), [[1.0], [1.0]]])
    def test_setitem_listlike_key_scalar_value_not_inplace(self, value):
        # GH#39510
        cols = ["A", "B"]
        df = DataFrame(0, index=[0, 1], columns=cols)
        df_copy = df.copy()
        df_view = df[:]
        df[["B"]] = value

        expected = DataFrame([[0, 1.0], [0, 1.0]], columns=cols)
        tm.assert_frame_equal(df_view, df_copy)
        tm.assert_frame_equal(df, expected)

    @pytest.mark.parametrize(
        "indexer",
        [
            "a",
            ["a"],
            pytest.param(
                [True, False],
                marks=pytest.mark.xfail(
                    reason="Boolean indexer incorrectly setting inplace",
                    strict=False,  # passing on some builds, no obvious pattern
                ),
            ),
        ],
    )
    @pytest.mark.parametrize(
        "value, set_value",
        [
            (1, 5),
            (1.0, 5.0),
            (Timestamp("2020-12-31"), Timestamp("2021-12-31")),
            ("a", "b"),
        ],
    )
    def test_setitem_not_operating_inplace(self, value, set_value, indexer):
        # GH#43406
        df = DataFrame({"a": value}, index=[0, 1])
        expected = df.copy()
        view = df[:]
        df[indexer] = set_value
        tm.assert_frame_equal(view, expected)
Example #35
0
def time_f(): return lambda x: Timestamp(x)

def fxcm_timestamp_fn(df):
Example #36
0
 def test_td_add_timedeltalike_object_dtype_array(self, op):
     # GH#21980
     arr = np.array([Timestamp("20130101 9:01"), Timestamp("20121230 9:02")])
     exp = np.array([Timestamp("20130102 9:01"), Timestamp("20121231 9:02")])
     res = op(arr, Timedelta("1D"))
     tm.assert_numpy_array_equal(res, exp)
Example #37
0
 def test_index_constructor_with_numpy_object_array_and_timestamp_tz_with_nan(self):
     # GH 27011
     result = Index(np.array([Timestamp("2019", tz="UTC"), np.nan], dtype=object))
     expected = DatetimeIndex([Timestamp("2019", tz="UTC"), pd.NaT])
     tm.assert_index_equal(result, expected)
Example #38
0

def test_bins_not_monotonic():
    msg = "bins must increase monotonically"
    data = [0.2, 1.4, 2.5, 6.2, 9.7, 2.1]

    with pytest.raises(ValueError, match=msg):
        cut(data, [0.1, 1.5, 1, 10])


@pytest.mark.parametrize(
    "x, bins, expected",
    [
        (
            date_range("2017-12-31", periods=3),
            [Timestamp.min, Timestamp("2018-01-01"), Timestamp.max],
            IntervalIndex.from_tuples(
                [
                    (Timestamp.min, Timestamp("2018-01-01")),
                    (Timestamp("2018-01-01"), Timestamp.max),
                ]
            ),
        ),
        (
            [-1, 0, 1],
            np.array(
                [np.iinfo(np.int64).min, 0, np.iinfo(np.int64).max], dtype="int64"
            ),
            IntervalIndex.from_tuples(
                [(np.iinfo(np.int64).min, 0), (0, np.iinfo(np.int64).max)]
            ),
Example #39
0
    def test_datetimeindex_constructor_misc(self):
        arr = ["1/1/2005", "1/2/2005", "Jn 3, 2005", "2005-01-04"]
        msg = r"(\(')?Unknown string format(:', 'Jn 3, 2005'\))?"
        with pytest.raises(ValueError, match=msg):
            DatetimeIndex(arr)

        arr = ["1/1/2005", "1/2/2005", "1/3/2005", "2005-01-04"]
        idx1 = DatetimeIndex(arr)

        arr = [datetime(2005, 1, 1), "1/2/2005", "1/3/2005", "2005-01-04"]
        idx2 = DatetimeIndex(arr)

        arr = [Timestamp(datetime(2005, 1, 1)), "1/2/2005", "1/3/2005", "2005-01-04"]
        idx3 = DatetimeIndex(arr)

        arr = np.array(["1/1/2005", "1/2/2005", "1/3/2005", "2005-01-04"], dtype="O")
        idx4 = DatetimeIndex(arr)

        arr = to_datetime(["1/1/2005", "1/2/2005", "1/3/2005", "2005-01-04"])
        idx5 = DatetimeIndex(arr)

        arr = to_datetime(["1/1/2005", "1/2/2005", "Jan 3, 2005", "2005-01-04"])
        idx6 = DatetimeIndex(arr)

        idx7 = DatetimeIndex(["12/05/2007", "25/01/2008"], dayfirst=True)
        idx8 = DatetimeIndex(
            ["2007/05/12", "2008/01/25"], dayfirst=False, yearfirst=True
        )
        tm.assert_index_equal(idx7, idx8)

        for other in [idx2, idx3, idx4, idx5, idx6]:
            assert (idx1.values == other.values).all()

        sdate = datetime(1999, 12, 25)
        edate = datetime(2000, 1, 1)
        idx = date_range(start=sdate, freq="1B", periods=20)
        assert len(idx) == 20
        assert idx[0] == sdate + 0 * offsets.BDay()
        assert idx.freq == "B"

        idx = date_range(end=edate, freq=("D", 5), periods=20)
        assert len(idx) == 20
        assert idx[-1] == edate
        assert idx.freq == "5D"

        idx1 = date_range(start=sdate, end=edate, freq="W-SUN")
        idx2 = date_range(start=sdate, end=edate, freq=offsets.Week(weekday=6))
        assert len(idx1) == len(idx2)
        assert idx1.freq == idx2.freq

        idx1 = date_range(start=sdate, end=edate, freq="QS")
        idx2 = date_range(
            start=sdate, end=edate, freq=offsets.QuarterBegin(startingMonth=1)
        )
        assert len(idx1) == len(idx2)
        assert idx1.freq == idx2.freq

        idx1 = date_range(start=sdate, end=edate, freq="BQ")
        idx2 = date_range(
            start=sdate, end=edate, freq=offsets.BQuarterEnd(startingMonth=12)
        )
        assert len(idx1) == len(idx2)
        assert idx1.freq == idx2.freq
Example #40
0
class TestCounting(object):
    def test_cumcount(self):
        df = DataFrame([['a'], ['a'], ['a'], ['b'], ['a']], columns=['A'])
        g = df.groupby('A')
        sg = g.A

        expected = Series([0, 1, 2, 0, 3])

        assert_series_equal(expected, g.cumcount())
        assert_series_equal(expected, sg.cumcount())

    def test_cumcount_empty(self):
        ge = DataFrame().groupby(level=0)
        se = Series().groupby(level=0)

        # edge case, as this is usually considered float
        e = Series(dtype='int64')

        assert_series_equal(e, ge.cumcount())
        assert_series_equal(e, se.cumcount())

    def test_cumcount_dupe_index(self):
        df = DataFrame([['a'], ['a'], ['a'], ['b'], ['a']],
                       columns=['A'],
                       index=[0] * 5)
        g = df.groupby('A')
        sg = g.A

        expected = Series([0, 1, 2, 0, 3], index=[0] * 5)

        assert_series_equal(expected, g.cumcount())
        assert_series_equal(expected, sg.cumcount())

    def test_cumcount_mi(self):
        mi = MultiIndex.from_tuples([[0, 1], [1, 2], [2, 2], [2, 2], [1, 0]])
        df = DataFrame([['a'], ['a'], ['a'], ['b'], ['a']],
                       columns=['A'],
                       index=mi)
        g = df.groupby('A')
        sg = g.A

        expected = Series([0, 1, 2, 0, 3], index=mi)

        assert_series_equal(expected, g.cumcount())
        assert_series_equal(expected, sg.cumcount())

    def test_cumcount_groupby_not_col(self):
        df = DataFrame([['a'], ['a'], ['a'], ['b'], ['a']],
                       columns=['A'],
                       index=[0] * 5)
        g = df.groupby([0, 0, 0, 1, 0])
        sg = g.A

        expected = Series([0, 1, 2, 0, 3], index=[0] * 5)

        assert_series_equal(expected, g.cumcount())
        assert_series_equal(expected, sg.cumcount())

    def test_ngroup(self):
        df = DataFrame({'A': list('aaaba')})
        g = df.groupby('A')
        sg = g.A

        expected = Series([0, 0, 0, 1, 0])

        assert_series_equal(expected, g.ngroup())
        assert_series_equal(expected, sg.ngroup())

    def test_ngroup_distinct(self):
        df = DataFrame({'A': list('abcde')})
        g = df.groupby('A')
        sg = g.A

        expected = Series(range(5), dtype='int64')

        assert_series_equal(expected, g.ngroup())
        assert_series_equal(expected, sg.ngroup())

    def test_ngroup_one_group(self):
        df = DataFrame({'A': [0] * 5})
        g = df.groupby('A')
        sg = g.A

        expected = Series([0] * 5)

        assert_series_equal(expected, g.ngroup())
        assert_series_equal(expected, sg.ngroup())

    def test_ngroup_empty(self):
        ge = DataFrame().groupby(level=0)
        se = Series().groupby(level=0)

        # edge case, as this is usually considered float
        e = Series(dtype='int64')

        assert_series_equal(e, ge.ngroup())
        assert_series_equal(e, se.ngroup())

    def test_ngroup_series_matches_frame(self):
        df = DataFrame({'A': list('aaaba')})
        s = Series(list('aaaba'))

        assert_series_equal(df.groupby(s).ngroup(), s.groupby(s).ngroup())

    def test_ngroup_dupe_index(self):
        df = DataFrame({'A': list('aaaba')}, index=[0] * 5)
        g = df.groupby('A')
        sg = g.A

        expected = Series([0, 0, 0, 1, 0], index=[0] * 5)

        assert_series_equal(expected, g.ngroup())
        assert_series_equal(expected, sg.ngroup())

    def test_ngroup_mi(self):
        mi = MultiIndex.from_tuples([[0, 1], [1, 2], [2, 2], [2, 2], [1, 0]])
        df = DataFrame({'A': list('aaaba')}, index=mi)
        g = df.groupby('A')
        sg = g.A
        expected = Series([0, 0, 0, 1, 0], index=mi)

        assert_series_equal(expected, g.ngroup())
        assert_series_equal(expected, sg.ngroup())

    def test_ngroup_groupby_not_col(self):
        df = DataFrame({'A': list('aaaba')}, index=[0] * 5)
        g = df.groupby([0, 0, 0, 1, 0])
        sg = g.A

        expected = Series([0, 0, 0, 1, 0], index=[0] * 5)

        assert_series_equal(expected, g.ngroup())
        assert_series_equal(expected, sg.ngroup())

    def test_ngroup_descending(self):
        df = DataFrame(['a', 'a', 'b', 'a', 'b'], columns=['A'])
        g = df.groupby(['A'])

        ascending = Series([0, 0, 1, 0, 1])
        descending = Series([1, 1, 0, 1, 0])

        assert_series_equal(descending, (g.ngroups - 1) - ascending)
        assert_series_equal(ascending, g.ngroup(ascending=True))
        assert_series_equal(descending, g.ngroup(ascending=False))

    def test_ngroup_matches_cumcount(self):
        # verify one manually-worked out case works
        df = DataFrame(
            [['a', 'x'], ['a', 'y'], ['b', 'x'], ['a', 'x'], ['b', 'y']],
            columns=['A', 'X'])
        g = df.groupby(['A', 'X'])
        g_ngroup = g.ngroup()
        g_cumcount = g.cumcount()
        expected_ngroup = Series([0, 1, 2, 0, 3])
        expected_cumcount = Series([0, 0, 0, 1, 0])

        assert_series_equal(g_ngroup, expected_ngroup)
        assert_series_equal(g_cumcount, expected_cumcount)

    def test_ngroup_cumcount_pair(self):
        # brute force comparison for all small series
        for p in cart_product(range(3), repeat=4):
            df = DataFrame({'a': p})
            g = df.groupby(['a'])

            order = sorted(set(p))
            ngroupd = [order.index(val) for val in p]
            cumcounted = [p[:i].count(val) for i, val in enumerate(p)]

            assert_series_equal(g.ngroup(), Series(ngroupd))
            assert_series_equal(g.cumcount(), Series(cumcounted))

    def test_ngroup_respects_groupby_order(self):
        np.random.seed(0)
        df = DataFrame({'a': np.random.choice(list('abcdef'), 100)})
        for sort_flag in (False, True):
            g = df.groupby(['a'], sort=sort_flag)
            df['group_id'] = -1
            df['group_index'] = -1

            for i, (_, group) in enumerate(g):
                df.loc[group.index, 'group_id'] = i
                for j, ind in enumerate(group.index):
                    df.loc[ind, 'group_index'] = j

            assert_series_equal(Series(df['group_id'].values), g.ngroup())
            assert_series_equal(Series(df['group_index'].values), g.cumcount())

    @pytest.mark.parametrize(
        'datetimelike',
        [[Timestamp('2016-05-%02d 20:09:25+00:00' % i) for i in range(1, 4)],
         [Timestamp('2016-05-%02d 20:09:25' % i)
          for i in range(1, 4)], [Timedelta(x, unit="h") for x in range(1, 4)],
         [Period(freq="2W", year=2017, month=x) for x in range(1, 4)]])
    def test_count_with_datetimelike(self, datetimelike):
        # test for #13393, where DataframeGroupBy.count() fails
        # when counting a datetimelike column.

        df = DataFrame({'x': ['a', 'a', 'b'], 'y': datetimelike})
        res = df.groupby('x').count()
        expected = DataFrame({'y': [2, 1]}, index=['a', 'b'])
        expected.index.name = "x"
        assert_frame_equal(expected, res)

    def test_count_with_only_nans_in_first_group(self):
        # GH21956
        df = DataFrame({'A': [np.nan, np.nan], 'B': ['a', 'b'], 'C': [1, 2]})
        result = df.groupby(['A', 'B']).C.count()
        mi = MultiIndex(levels=[[], ['a', 'b']],
                        codes=[[], []],
                        names=['A', 'B'])
        expected = Series([], index=mi, dtype=np.int64, name='C')
        assert_series_equal(result, expected, check_index_type=False)
def create_data():
    """ create the pickle/msgpack data """

    data = {
        u'A': [0., 1., 2., 3., np.nan],
        u'B': [0, 1, 0, 1, 0],
        u'C': [u'foo1', u'foo2', u'foo3', u'foo4', u'foo5'],
        u'D': date_range('1/1/2009', periods=5),
        u'E': [0., 1, Timestamp('20100101'), u'foo', 2.]
    }

    scalars = dict(timestamp=Timestamp('20130101'), period=Period('2012', 'M'))

    index = dict(int=Index(np.arange(10)),
                 date=date_range('20130101', periods=10),
                 period=period_range('2013-01-01', freq='M', periods=10),
                 float=Index(np.arange(10, dtype=np.float64)),
                 uint=Index(np.arange(10, dtype=np.uint64)),
                 timedelta=timedelta_range('00:00:00', freq='30T', periods=10))

    if _loose_version >= LooseVersion('0.18'):
        from pandas import RangeIndex
        index['range'] = RangeIndex(10)

    if _loose_version >= LooseVersion('0.21'):
        from pandas import interval_range
        index['interval'] = interval_range(0, periods=10)

    mi = dict(reg2=MultiIndex.from_tuples(tuple(
        zip(*[[u'bar', u'bar', u'baz', u'baz', u'foo', u'foo', u'qux', u'qux'],
              [u'one', u'two', u'one', u'two', u'one', u'two', u'one', u'two']
              ])),
                                          names=[u'first', u'second']))

    series = dict(
        float=Series(data[u'A']),
        int=Series(data[u'B']),
        mixed=Series(data[u'E']),
        ts=Series(np.arange(10).astype(np.int64),
                  index=date_range('20130101', periods=10)),
        mi=Series(np.arange(5).astype(np.float64),
                  index=MultiIndex.from_tuples(tuple(
                      zip(*[[1, 1, 2, 2, 2], [3, 4, 3, 4, 5]])),
                                               names=[u'one', u'two'])),
        dup=Series(np.arange(5).astype(np.float64),
                   index=[u'A', u'B', u'C', u'D', u'A']),
        cat=Series(Categorical([u'foo', u'bar', u'baz'])),
        dt=Series(date_range('20130101', periods=5)),
        dt_tz=Series(date_range('20130101', periods=5, tz='US/Eastern')),
        period=Series([Period('2000Q1')] * 5))

    mixed_dup_df = DataFrame(data)
    mixed_dup_df.columns = list(u"ABCDA")
    frame = dict(
        float=DataFrame({
            u'A': series[u'float'],
            u'B': series[u'float'] + 1
        }),
        int=DataFrame({
            u'A': series[u'int'],
            u'B': series[u'int'] + 1
        }),
        mixed=DataFrame({k: data[k]
                         for k in [u'A', u'B', u'C', u'D']}),
        mi=DataFrame(
            {
                u'A': np.arange(5).astype(np.float64),
                u'B': np.arange(5).astype(np.int64)
            },
            index=MultiIndex.from_tuples(tuple(
                zip(*[[u'bar', u'bar', u'baz', u'baz', u'baz'],
                      [u'one', u'two', u'one', u'two', u'three']])),
                                         names=[u'first', u'second'])),
        dup=DataFrame(np.arange(15).reshape(5, 3).astype(np.float64),
                      columns=[u'A', u'B', u'A']),
        cat_onecol=DataFrame({u'A': Categorical([u'foo', u'bar'])}),
        cat_and_float=DataFrame({
            u'A': Categorical([u'foo', u'bar', u'baz']),
            u'B': np.arange(3).astype(np.int64)
        }),
        mixed_dup=mixed_dup_df,
        dt_mixed_tzs=DataFrame(
            {
                u'A': Timestamp('20130102', tz='US/Eastern'),
                u'B': Timestamp('20130603', tz='CET')
            },
            index=range(5)),
        dt_mixed2_tzs=DataFrame(
            {
                u'A': Timestamp('20130102', tz='US/Eastern'),
                u'B': Timestamp('20130603', tz='CET'),
                u'C': Timestamp('20130603', tz='UTC')
            },
            index=range(5)))

    with catch_warnings(record=True):
        mixed_dup_panel = Panel({
            u'ItemA': frame[u'float'],
            u'ItemB': frame[u'int']
        })
        mixed_dup_panel.items = [u'ItemA', u'ItemA']
        panel = dict(float=Panel({
            u'ItemA': frame[u'float'],
            u'ItemB': frame[u'float'] + 1
        }),
                     dup=Panel(np.arange(30).reshape(3, 5,
                                                     2).astype(np.float64),
                               items=[u'A', u'B', u'A']),
                     mixed_dup=mixed_dup_panel)

    cat = dict(int8=Categorical(list('abcdefg')),
               int16=Categorical(np.arange(1000)),
               int32=Categorical(np.arange(10000)))

    timestamp = dict(normal=Timestamp('2011-01-01'),
                     nat=NaT,
                     tz=Timestamp('2011-01-01', tz='US/Eastern'))

    if _loose_version < LooseVersion('0.19.2'):
        timestamp['freq'] = Timestamp('2011-01-01', offset='D')
        timestamp['both'] = Timestamp('2011-01-01',
                                      tz='Asia/Tokyo',
                                      offset='M')
    else:
        timestamp['freq'] = Timestamp('2011-01-01', freq='D')
        timestamp['both'] = Timestamp('2011-01-01', tz='Asia/Tokyo', freq='M')

    off = {
        'DateOffset': DateOffset(years=1),
        'DateOffset_h_ns': DateOffset(hour=6, nanoseconds=5824),
        'BusinessDay': BusinessDay(offset=timedelta(seconds=9)),
        'BusinessHour': BusinessHour(normalize=True, n=6, end='15:14'),
        'CustomBusinessDay': CustomBusinessDay(weekmask='Mon Fri'),
        'SemiMonthBegin': SemiMonthBegin(day_of_month=9),
        'SemiMonthEnd': SemiMonthEnd(day_of_month=24),
        'MonthBegin': MonthBegin(1),
        'MonthEnd': MonthEnd(1),
        'QuarterBegin': QuarterBegin(1),
        'QuarterEnd': QuarterEnd(1),
        'Day': Day(1),
        'YearBegin': YearBegin(1),
        'YearEnd': YearEnd(1),
        'Week': Week(1),
        'Week_Tues': Week(2, normalize=False, weekday=1),
        'WeekOfMonth': WeekOfMonth(week=3, weekday=4),
        'LastWeekOfMonth': LastWeekOfMonth(n=1, weekday=3),
        'FY5253': FY5253(n=2, weekday=6, startingMonth=7, variation="last"),
        'Easter': Easter(),
        'Hour': Hour(1),
        'Minute': Minute(1)
    }

    return dict(series=series,
                frame=frame,
                panel=panel,
                index=index,
                scalars=scalars,
                mi=mi,
                sp_series=dict(float=_create_sp_series(),
                               ts=_create_sp_tsseries()),
                sp_frame=dict(float=_create_sp_frame()),
                cat=cat,
                timestamp=timestamp,
                offsets=off)
Example #42
0
    def test_divmod_invalid(self):
        # GH#19365
        td = Timedelta(days=2, hours=6)

        with pytest.raises(TypeError):
            divmod(td, Timestamp("2018-01-22"))
Example #43
0
    def test_construction_index_with_mixed_timezones_with_NaT(self):
        # see gh-11488
        result = Index(
            [pd.NaT, Timestamp("2011-01-01"), pd.NaT, Timestamp("2011-01-02")],
            name="idx",
        )
        exp = DatetimeIndex(
            [pd.NaT, Timestamp("2011-01-01"), pd.NaT, Timestamp("2011-01-02")],
            name="idx",
        )
        tm.assert_index_equal(result, exp, exact=True)
        assert isinstance(result, DatetimeIndex)
        assert result.tz is None

        # Same tz results in DatetimeIndex
        result = Index(
            [
                pd.NaT,
                Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"),
                pd.NaT,
                Timestamp("2011-01-02 10:00", tz="Asia/Tokyo"),
            ],
            name="idx",
        )
        exp = DatetimeIndex(
            [
                pd.NaT,
                Timestamp("2011-01-01 10:00"),
                pd.NaT,
                Timestamp("2011-01-02 10:00"),
            ],
            tz="Asia/Tokyo",
            name="idx",
        )
        tm.assert_index_equal(result, exp, exact=True)
        assert isinstance(result, DatetimeIndex)
        assert result.tz is not None
        assert result.tz == exp.tz

        # same tz results in DatetimeIndex (DST)
        result = Index(
            [
                Timestamp("2011-01-01 10:00", tz="US/Eastern"),
                pd.NaT,
                Timestamp("2011-08-01 10:00", tz="US/Eastern"),
            ],
            name="idx",
        )
        exp = DatetimeIndex(
            [Timestamp("2011-01-01 10:00"), pd.NaT, Timestamp("2011-08-01 10:00")],
            tz="US/Eastern",
            name="idx",
        )
        tm.assert_index_equal(result, exp, exact=True)
        assert isinstance(result, DatetimeIndex)
        assert result.tz is not None
        assert result.tz == exp.tz

        # different tz results in Index(dtype=object)
        result = Index(
            [
                pd.NaT,
                Timestamp("2011-01-01 10:00"),
                pd.NaT,
                Timestamp("2011-01-02 10:00", tz="US/Eastern"),
            ],
            name="idx",
        )
        exp = Index(
            [
                pd.NaT,
                Timestamp("2011-01-01 10:00"),
                pd.NaT,
                Timestamp("2011-01-02 10:00", tz="US/Eastern"),
            ],
            dtype="object",
            name="idx",
        )
        tm.assert_index_equal(result, exp, exact=True)
        assert not isinstance(result, DatetimeIndex)

        result = Index(
            [
                pd.NaT,
                Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"),
                pd.NaT,
                Timestamp("2011-01-02 10:00", tz="US/Eastern"),
            ],
            name="idx",
        )
        exp = Index(
            [
                pd.NaT,
                Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"),
                pd.NaT,
                Timestamp("2011-01-02 10:00", tz="US/Eastern"),
            ],
            dtype="object",
            name="idx",
        )
        tm.assert_index_equal(result, exp, exact=True)
        assert not isinstance(result, DatetimeIndex)

        # all NaT
        result = Index([pd.NaT, pd.NaT], name="idx")
        exp = DatetimeIndex([pd.NaT, pd.NaT], name="idx")
        tm.assert_index_equal(result, exp, exact=True)
        assert isinstance(result, DatetimeIndex)
        assert result.tz is None

        # all NaT with tz
        result = Index([pd.NaT, pd.NaT], tz="Asia/Tokyo", name="idx")
        exp = DatetimeIndex([pd.NaT, pd.NaT], tz="Asia/Tokyo", name="idx")

        tm.assert_index_equal(result, exp, exact=True)
        assert isinstance(result, DatetimeIndex)
        assert result.tz is not None
        assert result.tz == exp.tz
    def test_convert(self):
        # GH#10265
        dt = datetime(2001, 1, 1, 0, 0)
        td = dt - datetime(2000, 1, 1, 0, 0)

        # Test coercion with mixed types
        ser = Series(["a", "3.1415", dt, td])

        results = ser._convert(numeric=True)
        expected = Series([np.nan, 3.1415, np.nan, np.nan])
        tm.assert_series_equal(results, expected)

        # Test standard conversion returns original
        results = ser._convert(datetime=True)
        tm.assert_series_equal(results, ser)
        results = ser._convert(numeric=True)
        expected = Series([np.nan, 3.1415, np.nan, np.nan])
        tm.assert_series_equal(results, expected)
        results = ser._convert(timedelta=True)
        tm.assert_series_equal(results, ser)

        # test pass-through and non-conversion when other types selected
        ser = Series(["1.0", "2.0", "3.0"])
        results = ser._convert(datetime=True, numeric=True, timedelta=True)
        expected = Series([1.0, 2.0, 3.0])
        tm.assert_series_equal(results, expected)
        results = ser._convert(True, False, True)
        tm.assert_series_equal(results, ser)

        ser = Series(
            [datetime(2001, 1, 1, 0, 0), datetime(2001, 1, 1, 0, 0)], dtype="O"
        )
        results = ser._convert(datetime=True, numeric=True, timedelta=True)
        expected = Series([datetime(2001, 1, 1, 0, 0), datetime(2001, 1, 1, 0, 0)])
        tm.assert_series_equal(results, expected)
        results = ser._convert(datetime=False, numeric=True, timedelta=True)
        tm.assert_series_equal(results, ser)

        td = datetime(2001, 1, 1, 0, 0) - datetime(2000, 1, 1, 0, 0)
        ser = Series([td, td], dtype="O")
        results = ser._convert(datetime=True, numeric=True, timedelta=True)
        expected = Series([td, td])
        tm.assert_series_equal(results, expected)
        results = ser._convert(True, True, False)
        tm.assert_series_equal(results, ser)

        ser = Series([1.0, 2, 3], index=["a", "b", "c"])
        result = ser._convert(numeric=True)
        tm.assert_series_equal(result, ser)

        # force numeric conversion
        res = ser.copy().astype("O")
        res["a"] = "1"
        result = res._convert(numeric=True)
        tm.assert_series_equal(result, ser)

        res = ser.copy().astype("O")
        res["a"] = "1."
        result = res._convert(numeric=True)
        tm.assert_series_equal(result, ser)

        res = ser.copy().astype("O")
        res["a"] = "garbled"
        result = res._convert(numeric=True)
        expected = ser.copy()
        expected["a"] = np.nan
        tm.assert_series_equal(result, expected)

        # GH 4119, not converting a mixed type (e.g.floats and object)
        ser = Series([1, "na", 3, 4])
        result = ser._convert(datetime=True, numeric=True)
        expected = Series([1, np.nan, 3, 4])
        tm.assert_series_equal(result, expected)

        ser = Series([1, "", 3, 4])
        result = ser._convert(datetime=True, numeric=True)
        tm.assert_series_equal(result, expected)

        # dates
        ser = Series(
            [
                datetime(2001, 1, 1, 0, 0),
                datetime(2001, 1, 2, 0, 0),
                datetime(2001, 1, 3, 0, 0),
            ]
        )

        result = ser._convert(datetime=True)
        expected = Series(
            [Timestamp("20010101"), Timestamp("20010102"), Timestamp("20010103")],
            dtype="M8[ns]",
        )
        tm.assert_series_equal(result, expected)

        result = ser._convert(datetime=True)
        tm.assert_series_equal(result, expected)

        # preserver if non-object
        ser = Series([1], dtype="float32")
        result = ser._convert(datetime=True)
        tm.assert_series_equal(result, ser)
Example #45
0
def decode(obj):
    """
    Decoder for deserializing numpy data types.
    """

    typ = obj.get('typ')
    if typ is None:
        return obj
    elif typ == 'timestamp':
        return Timestamp(obj['value'], tz=obj['tz'], offset=obj['offset'])
    elif typ == 'period':
        return Period(ordinal=obj['ordinal'], freq=obj['freq'])
    elif typ == 'index':
        dtype = dtype_for(obj['dtype'])
        data = unconvert(obj['data'], np.typeDict[obj['dtype']],
                         obj.get('compress'))
        return globals()[obj['klass']](data, dtype=dtype, name=obj['name'])
    elif typ == 'multi_index':
        data = unconvert(obj['data'], np.typeDict[obj['dtype']],
                         obj.get('compress'))
        data = [tuple(x) for x in data]
        return globals()[obj['klass']].from_tuples(data, names=obj['names'])
    elif typ == 'period_index':
        data = unconvert(obj['data'], np.int64, obj.get('compress'))
        d = dict(name=obj['name'], freq=obj['freq'])
        return globals()[obj['klass']](data, **d)
    elif typ == 'datetime_index':
        data = unconvert(obj['data'], np.int64, obj.get('compress'))
        d = dict(name=obj['name'], freq=obj['freq'], verify_integrity=False)
        result = globals()[obj['klass']](data, **d)
        tz = obj['tz']

        # reverse tz conversion
        if tz is not None:
            result = result.tz_localize('UTC').tz_convert(tz)
        return result

    elif typ == 'series':
        dtype = dtype_for(obj['dtype'])
        index = obj['index']
        return globals()[obj['klass']](unconvert(obj['data'], dtype,
                                                 obj['compress']),
                                       index=index,
                                       name=obj['name'])
    elif typ == 'block_manager':
        axes = obj['axes']

        def create_block(b):
            values = unconvert(b['values'], dtype_for(b['dtype']),
                               b['compress']).reshape(b['shape'])
            return make_block(values=values,
                              klass=getattr(internals, b['klass']),
                              placement=axes[0].get_indexer(b['items']))

        blocks = [create_block(b) for b in obj['blocks']]
        return globals()[obj['klass']](BlockManager(blocks, axes))
    elif typ == 'datetime':
        return parse(obj['data'])
    elif typ == 'datetime64':
        return np.datetime64(parse(obj['data']))
    elif typ == 'date':
        return parse(obj['data']).date()
    elif typ == 'timedelta':
        return timedelta(*obj['data'])
    elif typ == 'timedelta64':
        return np.timedelta64(int(obj['data']))
    #elif typ == 'sparse_series':
    #    dtype = dtype_for(obj['dtype'])
    #    return globals()[obj['klass']](
    #        unconvert(obj['sp_values'], dtype, obj['compress']),
    #        sparse_index=obj['sp_index'], index=obj['index'],
    #        fill_value=obj['fill_value'], kind=obj['kind'], name=obj['name'])
    #elif typ == 'sparse_dataframe':
    #    return globals()[obj['klass']](
    #        obj['data'], columns=obj['columns'],
    #        default_fill_value=obj['default_fill_value'],
    #        default_kind=obj['default_kind']
    #    )
    #elif typ == 'sparse_panel':
    #    return globals()[obj['klass']](
    #        obj['data'], items=obj['items'],
    #        default_fill_value=obj['default_fill_value'],
    #        default_kind=obj['default_kind'])
    elif typ == 'block_index':
        return globals()[obj['klass']](obj['length'], obj['blocs'],
                                       obj['blengths'])
    elif typ == 'int_index':
        return globals()[obj['klass']](obj['length'], obj['indices'])
    elif typ == 'ndarray':
        return unconvert(obj['data'], np.typeDict[obj['dtype']],
                         obj.get('compress')).reshape(obj['shape'])
    elif typ == 'np_scalar':
        if obj.get('sub_typ') == 'np_complex':
            return c2f(obj['real'], obj['imag'], obj['dtype'])
        else:
            dtype = dtype_for(obj['dtype'])
            try:
                return dtype(obj['data'])
            except:
                return dtype.type(obj['data'])
    elif typ == 'np_complex':
        return complex(obj['real'] + '+' + obj['imag'] + 'j')
    elif isinstance(obj, (dict, list, set)):
        return obj
    else:
        return obj
Example #46
0
class TestArithmetic(object):
    @pytest.mark.parametrize("op", [operator.add, ops.radd])
    @pytest.mark.parametrize("other", ["category", "Int64"])
    def test_add_extension_scalar(self, other, box, op):
        # GH#22378
        # Check that scalars satisfying is_extension_array_dtype(obj)
        # do not incorrectly try to dispatch to an ExtensionArray operation

        arr = pd.Series(['a', 'b', 'c'])
        expected = pd.Series([op(x, other) for x in arr])

        arr = tm.box_expected(arr, box)
        expected = tm.box_expected(expected, box)

        result = op(arr, other)
        tm.assert_equal(result, expected)

    @pytest.mark.parametrize('box', [
        pytest.param(pd.Index,
                     marks=pytest.mark.xfail(reason="Does not mask nulls",
                                             strict=True,
                                             raises=TypeError)), pd.Series,
        pd.DataFrame
    ],
                             ids=lambda x: x.__name__)
    def test_objarr_add_str(self, box):
        ser = pd.Series(['x', np.nan, 'x'])
        expected = pd.Series(['xa', np.nan, 'xa'])

        ser = tm.box_expected(ser, box)
        expected = tm.box_expected(expected, box)

        result = ser + 'a'
        tm.assert_equal(result, expected)

    @pytest.mark.parametrize('box', [
        pytest.param(pd.Index,
                     marks=pytest.mark.xfail(reason="Does not mask nulls",
                                             strict=True,
                                             raises=TypeError)), pd.Series,
        pd.DataFrame
    ],
                             ids=lambda x: x.__name__)
    def test_objarr_radd_str(self, box):
        ser = pd.Series(['x', np.nan, 'x'])
        expected = pd.Series(['ax', np.nan, 'ax'])

        ser = tm.box_expected(ser, box)
        expected = tm.box_expected(expected, box)

        result = 'a' + ser
        tm.assert_equal(result, expected)

    @pytest.mark.parametrize(
        'data', [[1, 2, 3], [1.1, 2.2, 3.3],
                 [Timestamp('2011-01-01'),
                  Timestamp('2011-01-02'), pd.NaT], ['x', 'y', 1]])
    @pytest.mark.parametrize('dtype', [None, object])
    def test_objarr_radd_str_invalid(self, dtype, data, box):
        ser = Series(data, dtype=dtype)

        ser = tm.box_expected(ser, box)
        with pytest.raises(TypeError):
            'foo_' + ser

    @pytest.mark.parametrize('op',
                             [operator.add, ops.radd, operator.sub, ops.rsub])
    def test_objarr_add_invalid(self, op, box):
        # invalid ops
        if box is pd.DataFrame and op is ops.radd:
            pytest.xfail(reason="DataFrame op incorrectly casts the np.array"
                         "case to M8[ns]")

        obj_ser = tm.makeObjectSeries()
        obj_ser.name = 'objects'

        obj_ser = tm.box_expected(obj_ser, box)
        with pytest.raises(Exception):
            op(obj_ser, 1)
        with pytest.raises(Exception):
            op(obj_ser, np.array(1, dtype=np.int64))

    # TODO: Moved from tests.series.test_operators; needs cleanup
    def test_operators_na_handling(self):
        ser = Series(['foo', 'bar', 'baz', np.nan])
        result = 'prefix_' + ser
        expected = pd.Series(
            ['prefix_foo', 'prefix_bar', 'prefix_baz', np.nan])
        tm.assert_series_equal(result, expected)

        result = ser + '_suffix'
        expected = pd.Series(
            ['foo_suffix', 'bar_suffix', 'baz_suffix', np.nan])
        tm.assert_series_equal(result, expected)

    # TODO: parametrize over box
    @pytest.mark.parametrize('dtype', [None, object])
    def test_series_with_dtype_radd_timedelta(self, dtype):
        # note this test is _not_ aimed at timedelta64-dtyped Series
        ser = pd.Series([
            pd.Timedelta('1 days'),
            pd.Timedelta('2 days'),
            pd.Timedelta('3 days')
        ],
                        dtype=dtype)
        expected = pd.Series([
            pd.Timedelta('4 days'),
            pd.Timedelta('5 days'),
            pd.Timedelta('6 days')
        ])

        result = pd.Timedelta('3 days') + ser
        tm.assert_series_equal(result, expected)

        result = ser + pd.Timedelta('3 days')
        tm.assert_series_equal(result, expected)
Example #47
0
 def test_construction_with_nat_and_tzlocal(self):
     tz = dateutil.tz.tzlocal()
     result = DatetimeIndex(["2018", "NaT"], tz=tz)
     expected = DatetimeIndex([Timestamp("2018", tz=tz), pd.NaT])
     tm.assert_index_equal(result, expected)
Example #48
0
 def test_td_rsub_mixed_most_timedeltalike_object_dtype_array(self):
     # GH#21980
     now = Timestamp.now()
     arr = np.array([now, Timedelta("1D"), np.timedelta64(2, "h")])
     with pytest.raises(TypeError):
         Timedelta("1D") - arr
Example #49
0
 def test_maybe_cast_slice_duplicate_monotonic(self):
     # https://github.com/pandas-dev/pandas/issues/16515
     idx = DatetimeIndex(["2017", "2017"])
     result = idx._maybe_cast_slice_bound("2017-01-01", "left", "loc")
     expected = Timestamp("2017-01-01")
     assert result == expected
Example #50
0
    def test_interleave(self):

        # interleave with object
        result = self.tzframe.assign(D='foo').values
        expected = np.array(
            [[
                Timestamp('2013-01-01 00:00:00'),
                Timestamp('2013-01-02 00:00:00'),
                Timestamp('2013-01-03 00:00:00')
            ],
             [
                 Timestamp('2013-01-01 00:00:00-0500', tz='US/Eastern'),
                 pd.NaT,
                 Timestamp('2013-01-03 00:00:00-0500', tz='US/Eastern')
             ],
             [
                 Timestamp('2013-01-01 00:00:00+0100', tz='CET'), pd.NaT,
                 Timestamp('2013-01-03 00:00:00+0100', tz='CET')
             ], ['foo', 'foo', 'foo']],
            dtype=object).T
        tm.assert_numpy_array_equal(result, expected)

        # interleave with only datetime64[ns]
        result = self.tzframe.values
        expected = np.array(
            [[
                Timestamp('2013-01-01 00:00:00'),
                Timestamp('2013-01-02 00:00:00'),
                Timestamp('2013-01-03 00:00:00')
            ],
             [
                 Timestamp('2013-01-01 00:00:00-0500', tz='US/Eastern'),
                 pd.NaT,
                 Timestamp('2013-01-03 00:00:00-0500', tz='US/Eastern')
             ],
             [
                 Timestamp('2013-01-01 00:00:00+0100', tz='CET'), pd.NaT,
                 Timestamp('2013-01-03 00:00:00+0100', tz='CET')
             ]],
            dtype=object).T
        tm.assert_numpy_array_equal(result, expected)
Example #51
0
def test_generate_training_set(mocker):
    mocker.patch("dagster_examples.bay_bikes.solids.read_sql_table",
                 side_effect=mock_read_sql)

    # Execute Pipeline
    test_pipeline_result = execute_pipeline(
        pipeline=generate_test_training_set_pipeline,
        mode="testing",
        run_config=compose_training_data_env_dict(),
    )
    assert test_pipeline_result.success

    # Check solids
    EXPECTED_TRAFFIC_RECORDS = [
        {
            "interval_date": date(2019, 7, 31),
            "peak_traffic_load": 1,
            "time": Timestamp("2019-07-31 00:00:00"),
        },
        {
            "interval_date": date(2019, 8, 31),
            "peak_traffic_load": 1,
            "time": Timestamp("2019-08-31 00:00:00"),
        },
    ]
    traffic_dataset = test_pipeline_result.output_for_solid(
        "transform_into_traffic_dataset",
        output_name="traffic_dataframe").to_dict("records")
    assert all(record in EXPECTED_TRAFFIC_RECORDS
               for record in traffic_dataset)

    EXPECTED_WEATHER_RECORDS = [
        {
            "time": Timestamp("2019-08-31 00:00:00"),
            "summary": "Clear throughout the day.",
            "icon": "clear-day",
            "sunriseTime": 1546269960,
            "sunsetTime": 1546304520,
            "precipIntensity": 0.0007,
            "precipIntensityMax": 0.0019,
            "precipProbability": 0.05,
            "precipType": "rain",
            "temperatureHigh": 56.71,
            "temperatureHighTime": 1546294020,
            "temperatureLow": 44.75,
            "temperatureLowTime": 1546358040,
            "dewPoint": 28.34,
            "humidity": 0.43,
            "pressure": 1017.7,
            "windSpeed": 12.46,
            "windGust": 26.85,
            "windGustTime": 1546289220,
            "windBearing": 0,
            "cloudCover": 0.11,
            "uvIndex": 2,
            "uvIndexTime": 1546287180,
            "visibility": 10,
            "ozone": 314.4,
        },
        {
            "time": Timestamp("2019-07-31 00:00:00"),
            "summary": "Clear throughout the day.",
            "icon": "clear-day",
            "sunriseTime": 1546356420,
            "sunsetTime": 1546390920,
            "precipIntensity": 0.0005,
            "precipIntensityMax": 0.0016,
            "precipProbability": 0.02,
            "precipType": "sunny",
            "temperatureHigh": 55.91,
            "temperatureHighTime": 1546382040,
            "temperatureLow": 41.18,
            "temperatureLowTime": 1546437660,
            "dewPoint": 20.95,
            "humidity": 0.33,
            "pressure": 1023.3,
            "windSpeed": 6.77,
            "windGust": 22.08,
            "windGustTime": 1546343340,
            "windBearing": 22,
            "cloudCover": 0.1,
            "uvIndex": 2,
            "uvIndexTime": 1546373580,
            "visibility": 10,
            "ozone": 305.3,
        },
    ]
    weather_dataset = test_pipeline_result.output_for_solid(
        "produce_weather_dataset",
        output_name="weather_dataframe").to_dict("records")
    assert all(record in EXPECTED_WEATHER_RECORDS
               for record in weather_dataset)

    # Ensure we are generating the expected training set
    training_set, labels = test_pipeline_result.output_for_solid(
        "produce_training_set")
    assert len(labels) == 1 and labels[0] == 1
    assert array_equal(
        training_set,
        [[
            [
                1546356420.0,
                1546390920.0,
                0.0005,
                0.0016,
                0.02,
                55.91,
                1546382040.0,
                41.18,
                1546437660.0,
                20.95,
                0.33,
                1023.3,
                6.77,
                22.08,
                1546343340.0,
                22.0,
                0.1,
                2.0,
                1546373580.0,
                10.0,
                305.3,
            ],
            [
                1546269960.0,
                1546304520.0,
                0.0007,
                0.0019,
                0.05,
                56.71,
                1546294020.0,
                44.75,
                1546358040.0,
                28.34,
                0.43,
                1017.7,
                12.46,
                26.85,
                1546289220.0,
                0.0,
                0.11,
                2.0,
                1546287180.0,
                10.0,
                314.4,
            ],
        ]],
    )
    materialization_events = [
        event for event in test_pipeline_result.step_event_list
        if event.solid_name == "upload_training_set_to_gcs"
        and event.event_type_value == "STEP_MATERIALIZATION"
    ]
    assert len(materialization_events) == 1
    materialization = materialization_events[
        0].event_specific_data.materialization
    assert materialization.asset_key.path[0:5] == [
        "gs",
        "dagster",
        "scratch",
        "ccdfe1e",
        "training_data",
    ]
    materialization_event_metadata = materialization.metadata_entries
    assert len(materialization_event_metadata) == 1
    assert materialization_event_metadata[
        0].label == "google cloud storage URI"
    assert materialization_event_metadata[0].entry_data.text.startswith(
        "gs://dagster-scratch-ccdfe1e/training_data")

    # Clean up
    shutil.rmtree(os.path.join(tempfile.gettempdir(), "testing-storage"),
                  ignore_errors=True)
Example #52
0
    MockDailyBarReader,
)
from zipline.testing.fixtures import (
    WithAdjustmentReader,
    ZiplineTestCase,
)

# Test calendar ranges over the month of June 2015
#      June 2015
# Mo Tu We Th Fr Sa Su
#  1  2  3  4  5  6  7
#  8  9 10 11 12 13 14
# 15 16 17 18 19 20 21
# 22 23 24 25 26 27 28
# 29 30
TEST_CALENDAR_START = Timestamp('2015-06-01', tz='UTC')
TEST_CALENDAR_STOP = Timestamp('2015-06-30', tz='UTC')

TEST_QUERY_START = Timestamp('2015-06-10', tz='UTC')
TEST_QUERY_STOP = Timestamp('2015-06-19', tz='UTC')

# One asset for each of the cases enumerated in load_raw_arrays_from_bcolz.
EQUITY_INFO = DataFrame(
    [
        # 1) The equity's trades start and end before query.
        {
            'start_date': '2015-06-01',
            'end_date': '2015-06-05'
        },
        # 2) The equity's trades start and end after query.
        {
Example #53
0
class TestDataFrameSetItem:
    def test_setitem_str_subclass(self):
        # GH#37366
        class mystring(str):
            pass

        data = ["2020-10-22 01:21:00+00:00"]
        index = DatetimeIndex(data)
        df = DataFrame({"a": [1]}, index=index)
        df["b"] = 2
        df[mystring("c")] = 3
        expected = DataFrame({
            "a": [1],
            "b": [2],
            mystring("c"): [3]
        },
                             index=index)
        tm.assert_equal(df, expected)

    @pytest.mark.parametrize(
        "dtype", ["int32", "int64", "uint32", "uint64", "float32", "float64"])
    def test_setitem_dtype(self, dtype, float_frame):
        arr = np.random.randn(len(float_frame))

        float_frame[dtype] = np.array(arr, dtype=dtype)
        assert float_frame[dtype].dtype.name == dtype

    def test_setitem_list_not_dataframe(self, float_frame):
        data = np.random.randn(len(float_frame), 2)
        float_frame[["A", "B"]] = data
        tm.assert_almost_equal(float_frame[["A", "B"]].values, data)

    def test_setitem_error_msmgs(self):

        # GH 7432
        df = DataFrame(
            {
                "bar": [1, 2, 3],
                "baz": ["d", "e", "f"]
            },
            index=Index(["a", "b", "c"], name="foo"),
        )
        ser = Series(
            ["g", "h", "i", "j"],
            index=Index(["a", "b", "c", "a"], name="foo"),
            name="fiz",
        )
        msg = "cannot reindex on an axis with duplicate labels"
        with pytest.raises(ValueError, match=msg):
            with tm.assert_produces_warning(FutureWarning, match="non-unique"):
                df["newcol"] = ser

        # GH 4107, more descriptive error message
        df = DataFrame(np.random.randint(0, 2, (4, 4)),
                       columns=["a", "b", "c", "d"])

        msg = "incompatible index of inserted column with frame index"
        with pytest.raises(TypeError, match=msg):
            df["gr"] = df.groupby(["b", "c"]).count()

    def test_setitem_benchmark(self):
        # from the vb_suite/frame_methods/frame_insert_columns
        N = 10
        K = 5
        df = DataFrame(index=range(N))
        new_col = np.random.randn(N)
        for i in range(K):
            df[i] = new_col
        expected = DataFrame(np.repeat(new_col, K).reshape(N, K),
                             index=range(N))
        tm.assert_frame_equal(df, expected)

    def test_setitem_different_dtype(self):
        df = DataFrame(np.random.randn(5, 3),
                       index=np.arange(5),
                       columns=["c", "b", "a"])
        df.insert(0, "foo", df["a"])
        df.insert(2, "bar", df["c"])

        # diff dtype

        # new item
        df["x"] = df["a"].astype("float32")
        result = df.dtypes
        expected = Series(
            [np.dtype("float64")] * 5 + [np.dtype("float32")],
            index=["foo", "c", "bar", "b", "a", "x"],
        )
        tm.assert_series_equal(result, expected)

        # replacing current (in different block)
        df["a"] = df["a"].astype("float32")
        result = df.dtypes
        expected = Series(
            [np.dtype("float64")] * 4 + [np.dtype("float32")] * 2,
            index=["foo", "c", "bar", "b", "a", "x"],
        )
        tm.assert_series_equal(result, expected)

        df["y"] = df["a"].astype("int32")
        result = df.dtypes
        expected = Series(
            [np.dtype("float64")] * 4 + [np.dtype("float32")] * 2 +
            [np.dtype("int32")],
            index=["foo", "c", "bar", "b", "a", "x", "y"],
        )
        tm.assert_series_equal(result, expected)

    def test_setitem_empty_columns(self):
        # GH 13522
        df = DataFrame(index=["A", "B", "C"])
        df["X"] = df.index
        df["X"] = ["x", "y", "z"]
        exp = DataFrame(data={"X": ["x", "y", "z"]}, index=["A", "B", "C"])
        tm.assert_frame_equal(df, exp)

    def test_setitem_dt64_index_empty_columns(self):
        rng = date_range("1/1/2000 00:00:00", "1/1/2000 1:59:50", freq="10s")
        df = DataFrame(index=np.arange(len(rng)))

        df["A"] = rng
        assert df["A"].dtype == np.dtype("M8[ns]")

    def test_setitem_timestamp_empty_columns(self):
        # GH#19843
        df = DataFrame(index=range(3))
        df["now"] = Timestamp("20130101", tz="UTC")

        expected = DataFrame([[Timestamp("20130101", tz="UTC")]] * 3,
                             index=[0, 1, 2],
                             columns=["now"])
        tm.assert_frame_equal(df, expected)

    def test_setitem_wrong_length_categorical_dtype_raises(self):
        # GH#29523
        cat = Categorical.from_codes([0, 1, 1, 0, 1, 2], ["a", "b", "c"])
        df = DataFrame(range(10), columns=["bar"])

        msg = (rf"Length of values \({len(cat)}\) "
               rf"does not match length of index \({len(df)}\)")
        with pytest.raises(ValueError, match=msg):
            df["foo"] = cat

    def test_setitem_with_sparse_value(self):
        # GH#8131
        df = DataFrame({"c_1": ["a", "b", "c"], "n_1": [1.0, 2.0, 3.0]})
        sp_array = SparseArray([0, 0, 1])
        df["new_column"] = sp_array

        expected = Series(sp_array, name="new_column")
        tm.assert_series_equal(df["new_column"], expected)

    def test_setitem_with_unaligned_sparse_value(self):
        df = DataFrame({"c_1": ["a", "b", "c"], "n_1": [1.0, 2.0, 3.0]})
        sp_series = Series(SparseArray([0, 0, 1]), index=[2, 1, 0])

        df["new_column"] = sp_series
        expected = Series(SparseArray([1, 0, 0]), name="new_column")
        tm.assert_series_equal(df["new_column"], expected)

    def test_setitem_period_preserves_dtype(self):
        # GH: 26861
        data = [Period("2003-12", "D")]
        result = DataFrame([])
        result["a"] = data

        expected = DataFrame({"a": data})

        tm.assert_frame_equal(result, expected)

    def test_setitem_dict_preserves_dtypes(self):
        # https://github.com/pandas-dev/pandas/issues/34573
        expected = DataFrame({
            "a": Series([0, 1, 2], dtype="int64"),
            "b": Series([1, 2, 3], dtype=float),
            "c": Series([1, 2, 3], dtype=float),
            "d": Series([1, 2, 3], dtype="uint32"),
        })
        df = DataFrame({
            "a": Series([], dtype="int64"),
            "b": Series([], dtype=float),
            "c": Series([], dtype=float),
            "d": Series([], dtype="uint32"),
        })
        for idx, b in enumerate([1, 2, 3]):
            df.loc[df.shape[0]] = {
                "a": int(idx),
                "b": float(b),
                "c": float(b),
                "d": np.uint32(b),
            }
        tm.assert_frame_equal(df, expected)

    @pytest.mark.parametrize(
        "obj,dtype",
        [
            (Period("2020-01"), PeriodDtype("M")),
            (
                Interval(left=0, right=5, inclusive="right"),
                IntervalDtype("int64", "right"),
            ),
            (
                Timestamp("2011-01-01", tz="US/Eastern"),
                DatetimeTZDtype(tz="US/Eastern"),
            ),
        ],
    )
    def test_setitem_extension_types(self, obj, dtype):
        # GH: 34832
        expected = DataFrame({
            "idx": [1, 2, 3],
            "obj": Series([obj] * 3, dtype=dtype)
        })

        df = DataFrame({"idx": [1, 2, 3]})
        df["obj"] = obj

        tm.assert_frame_equal(df, expected)

    @pytest.mark.parametrize(
        "ea_name",
        [
            dtype.name for dtype in ea_registry.dtypes
            # property would require instantiation
            if not isinstance(dtype.name, property)
        ]
        # mypy doesn't allow adding lists of different types
        # https://github.com/python/mypy/issues/5492
        + ["datetime64[ns, UTC]", "period[D]"],  # type: ignore[list-item]
    )
    def test_setitem_with_ea_name(self, ea_name):
        # GH 38386
        result = DataFrame([0])
        result[ea_name] = [1]
        expected = DataFrame({0: [0], ea_name: [1]})
        tm.assert_frame_equal(result, expected)

    def test_setitem_dt64_ndarray_with_NaT_and_diff_time_units(self):
        # GH#7492
        data_ns = np.array([1, "nat"], dtype="datetime64[ns]")
        result = Series(data_ns).to_frame()
        result["new"] = data_ns
        expected = DataFrame({
            0: [1, None],
            "new": [1, None]
        },
                             dtype="datetime64[ns]")
        tm.assert_frame_equal(result, expected)

        # OutOfBoundsDatetime error shouldn't occur
        data_s = np.array([1, "nat"], dtype="datetime64[s]")
        result["new"] = data_s
        expected = DataFrame({
            0: [1, None],
            "new": [1e9, None]
        },
                             dtype="datetime64[ns]")
        tm.assert_frame_equal(result, expected)

    @pytest.mark.parametrize("unit", ["h", "m", "s", "ms", "D", "M", "Y"])
    def test_frame_setitem_datetime64_col_other_units(self, unit):
        # Check that non-nano dt64 values get cast to dt64 on setitem
        #  into a not-yet-existing column
        n = 100

        dtype = np.dtype(f"M8[{unit}]")
        vals = np.arange(n, dtype=np.int64).view(dtype)
        ex_vals = vals.astype("datetime64[ns]")

        df = DataFrame({"ints": np.arange(n)}, index=np.arange(n))
        df[unit] = vals

        assert df[unit].dtype == np.dtype("M8[ns]")
        assert (df[unit].values == ex_vals).all()

    @pytest.mark.parametrize("unit", ["h", "m", "s", "ms", "D", "M", "Y"])
    def test_frame_setitem_existing_datetime64_col_other_units(self, unit):
        # Check that non-nano dt64 values get cast to dt64 on setitem
        #  into an already-existing dt64 column
        n = 100

        dtype = np.dtype(f"M8[{unit}]")
        vals = np.arange(n, dtype=np.int64).view(dtype)
        ex_vals = vals.astype("datetime64[ns]")

        df = DataFrame({"ints": np.arange(n)}, index=np.arange(n))
        df["dates"] = np.arange(n, dtype=np.int64).view("M8[ns]")

        # We overwrite existing dt64 column with new, non-nano dt64 vals
        df["dates"] = vals
        assert (df["dates"].values == ex_vals).all()

    def test_setitem_dt64tz(self, timezone_frame):

        df = timezone_frame
        idx = df["B"].rename("foo")

        # setitem
        df["C"] = idx
        tm.assert_series_equal(df["C"], Series(idx, name="C"))

        df["D"] = "foo"
        df["D"] = idx
        tm.assert_series_equal(df["D"], Series(idx, name="D"))
        del df["D"]

        # assert that A & C are not sharing the same base (e.g. they
        # are copies)
        v1 = df._mgr.arrays[1]
        v2 = df._mgr.arrays[2]
        tm.assert_extension_array_equal(v1, v2)
        v1base = v1._data.base
        v2base = v2._data.base
        assert v1base is None or (id(v1base) != id(v2base))

        # with nan
        df2 = df.copy()
        df2.iloc[1, 1] = NaT
        df2.iloc[1, 2] = NaT
        result = df2["B"]
        tm.assert_series_equal(notna(result),
                               Series([True, False, True], name="B"))
        tm.assert_series_equal(df2.dtypes, df.dtypes)

    def test_setitem_periodindex(self):
        rng = period_range("1/1/2000", periods=5, name="index")
        df = DataFrame(np.random.randn(5, 3), index=rng)

        df["Index"] = rng
        rs = Index(df["Index"])
        tm.assert_index_equal(rs, rng, check_names=False)
        assert rs.name == "Index"
        assert rng.name == "index"

        rs = df.reset_index().set_index("index")
        assert isinstance(rs.index, PeriodIndex)
        tm.assert_index_equal(rs.index, rng)

    def test_setitem_complete_column_with_array(self):
        # GH#37954
        df = DataFrame({"a": ["one", "two", "three"], "b": [1, 2, 3]})
        arr = np.array([[1, 1], [3, 1], [5, 1]])
        df[["c", "d"]] = arr
        expected = DataFrame({
            "a": ["one", "two", "three"],
            "b": [1, 2, 3],
            "c": [1, 3, 5],
            "d": [1, 1, 1],
        })
        expected["c"] = expected["c"].astype(arr.dtype)
        expected["d"] = expected["d"].astype(arr.dtype)
        assert expected["c"].dtype == arr.dtype
        assert expected["d"].dtype == arr.dtype
        tm.assert_frame_equal(df, expected)

    @pytest.mark.parametrize("dtype", ["f8", "i8", "u8"])
    def test_setitem_bool_with_numeric_index(self, dtype):
        # GH#36319
        cols = Index([1, 2, 3], dtype=dtype)
        df = DataFrame(np.random.randn(3, 3), columns=cols)

        df[False] = ["a", "b", "c"]

        expected_cols = Index([1, 2, 3, False], dtype=object)
        if dtype == "f8":
            expected_cols = Index([1.0, 2.0, 3.0, False], dtype=object)

        tm.assert_index_equal(df.columns, expected_cols)

    @pytest.mark.parametrize("indexer", ["B", ["B"]])
    def test_setitem_frame_length_0_str_key(self, indexer):
        # GH#38831
        df = DataFrame(columns=["A", "B"])
        other = DataFrame({"B": [1, 2]})
        df[indexer] = other
        expected = DataFrame({"A": [np.nan] * 2, "B": [1, 2]})
        expected["A"] = expected["A"].astype("object")
        tm.assert_frame_equal(df, expected)

    def test_setitem_frame_duplicate_columns(self, using_array_manager):
        # GH#15695
        warn = FutureWarning if using_array_manager else None
        msg = "will attempt to set the values inplace"

        cols = ["A", "B", "C"] * 2
        df = DataFrame(index=range(3), columns=cols)
        df.loc[0, "A"] = (0, 3)
        with tm.assert_produces_warning(warn, match=msg):
            df.loc[:, "B"] = (1, 4)
        df["C"] = (2, 5)
        expected = DataFrame(
            [
                [0, 1, 2, 3, 4, 5],
                [np.nan, 1, 2, np.nan, 4, 5],
                [np.nan, 1, 2, np.nan, 4, 5],
            ],
            dtype="object",
        )

        if using_array_manager:
            # setitem replaces column so changes dtype

            expected.columns = cols
            expected["C"] = expected["C"].astype("int64")
            # TODO(ArrayManager) .loc still overwrites
            expected["B"] = expected["B"].astype("int64")

        else:
            # set these with unique columns to be extra-unambiguous
            expected[2] = expected[2].astype(np.int64)
            expected[5] = expected[5].astype(np.int64)
            expected.columns = cols

        tm.assert_frame_equal(df, expected)

    def test_setitem_frame_duplicate_columns_size_mismatch(self):
        # GH#39510
        cols = ["A", "B", "C"] * 2
        df = DataFrame(index=range(3), columns=cols)
        with pytest.raises(ValueError,
                           match="Columns must be same length as key"):
            df[["A"]] = (0, 3, 5)

        df2 = df.iloc[:, :3]  # unique columns
        with pytest.raises(ValueError,
                           match="Columns must be same length as key"):
            df2[["A"]] = (0, 3, 5)

    @pytest.mark.parametrize("cols", [["a", "b", "c"], ["a", "a", "a"]])
    def test_setitem_df_wrong_column_number(self, cols):
        # GH#38604
        df = DataFrame([[1, 2, 3]], columns=cols)
        rhs = DataFrame([[10, 11]], columns=["d", "e"])
        msg = "Columns must be same length as key"
        with pytest.raises(ValueError, match=msg):
            df["a"] = rhs

    def test_setitem_listlike_indexer_duplicate_columns(self):
        # GH#38604
        df = DataFrame([[1, 2, 3]], columns=["a", "b", "b"])
        rhs = DataFrame([[10, 11, 12]], columns=["a", "b", "b"])
        df[["a", "b"]] = rhs
        expected = DataFrame([[10, 11, 12]], columns=["a", "b", "b"])
        tm.assert_frame_equal(df, expected)

        df[["c", "b"]] = rhs
        expected = DataFrame([[10, 11, 12, 10]], columns=["a", "b", "b", "c"])
        tm.assert_frame_equal(df, expected)

    def test_setitem_listlike_indexer_duplicate_columns_not_equal_length(self):
        # GH#39403
        df = DataFrame([[1, 2, 3]], columns=["a", "b", "b"])
        rhs = DataFrame([[10, 11]], columns=["a", "b"])
        msg = "Columns must be same length as key"
        with pytest.raises(ValueError, match=msg):
            df[["a", "b"]] = rhs

    def test_setitem_intervals(self):

        df = DataFrame({"A": range(10)})
        ser = cut(df["A"], 5)
        assert isinstance(ser.cat.categories, IntervalIndex)

        # B & D end up as Categoricals
        # the remainder are converted to in-line objects
        # containing an IntervalIndex.values
        df["B"] = ser
        df["C"] = np.array(ser)
        df["D"] = ser.values
        df["E"] = np.array(ser.values)
        df["F"] = ser.astype(object)

        assert is_categorical_dtype(df["B"].dtype)
        assert is_interval_dtype(df["B"].cat.categories)
        assert is_categorical_dtype(df["D"].dtype)
        assert is_interval_dtype(df["D"].cat.categories)

        # These go through the Series constructor and so get inferred back
        #  to IntervalDtype
        assert is_interval_dtype(df["C"])
        assert is_interval_dtype(df["E"])

        # But the Series constructor doesn't do inference on Series objects,
        #  so setting df["F"] doesn't get cast back to IntervalDtype
        assert is_object_dtype(df["F"])

        # they compare equal as Index
        # when converted to numpy objects
        c = lambda x: Index(np.array(x))
        tm.assert_index_equal(c(df.B), c(df.B))
        tm.assert_index_equal(c(df.B), c(df.C), check_names=False)
        tm.assert_index_equal(c(df.B), c(df.D), check_names=False)
        tm.assert_index_equal(c(df.C), c(df.D), check_names=False)

        # B & D are the same Series
        tm.assert_series_equal(df["B"], df["B"])
        tm.assert_series_equal(df["B"], df["D"], check_names=False)

        # C & E are the same Series
        tm.assert_series_equal(df["C"], df["C"])
        tm.assert_series_equal(df["C"], df["E"], check_names=False)

    def test_setitem_categorical(self):
        # GH#35369
        df = DataFrame({"h": Series(list("mn")).astype("category")})
        df.h = df.h.cat.reorder_categories(["n", "m"])
        expected = DataFrame(
            {"h": Categorical(["m", "n"]).reorder_categories(["n", "m"])})
        tm.assert_frame_equal(df, expected)

    def test_setitem_with_empty_listlike(self):
        # GH#17101
        index = Index([], name="idx")
        result = DataFrame(columns=["A"], index=index)
        result["A"] = []
        expected = DataFrame(columns=["A"], index=index)
        tm.assert_index_equal(result.index, expected.index)

    @pytest.mark.parametrize(
        "cols, values, expected",
        [
            (["C", "D", "D", "a"], [1, 2, 3, 4], 4),  # with duplicates
            (["D", "C", "D", "a"], [1, 2, 3, 4], 4),  # mixed order
            (["C", "B", "B", "a"], [1, 2, 3, 4], 4),  # other duplicate cols
            (["C", "B", "a"], [1, 2, 3], 3),  # no duplicates
            (["B", "C", "a"], [3, 2, 1], 1),  # alphabetical order
            (["C", "a", "B"], [3, 2, 1], 2),  # in the middle
        ],
    )
    def test_setitem_same_column(self, cols, values, expected):
        # GH#23239
        df = DataFrame([values], columns=cols)
        df["a"] = df["a"]
        result = df["a"].values[0]
        assert result == expected

    def test_setitem_multi_index(self):
        # GH#7655, test that assigning to a sub-frame of a frame
        # with multi-index columns aligns both rows and columns
        it = ["jim", "joe", "jolie"], ["first",
                                       "last"], ["left", "center", "right"]

        cols = MultiIndex.from_product(it)
        index = date_range("20141006", periods=20)
        vals = np.random.randint(1, 1000, (len(index), len(cols)))
        df = DataFrame(vals, columns=cols, index=index)

        i, j = df.index.values.copy(), it[-1][:]

        np.random.shuffle(i)
        df["jim"] = df["jolie"].loc[i, ::-1]
        tm.assert_frame_equal(df["jim"], df["jolie"])

        np.random.shuffle(j)
        df[("joe", "first")] = df[("jolie", "last")].loc[i, j]
        tm.assert_frame_equal(df[("joe", "first")], df[("jolie", "last")])

        np.random.shuffle(j)
        df[("joe", "last")] = df[("jolie", "first")].loc[i, j]
        tm.assert_frame_equal(df[("joe", "last")], df[("jolie", "first")])

    @pytest.mark.parametrize(
        "columns,box,expected",
        [
            (
                ["A", "B", "C", "D"],
                7,
                DataFrame(
                    [[7, 7, 7, 7], [7, 7, 7, 7], [7, 7, 7, 7]],
                    columns=["A", "B", "C", "D"],
                ),
            ),
            (
                ["C", "D"],
                [7, 8],
                DataFrame(
                    [[1, 2, 7, 8], [3, 4, 7, 8], [5, 6, 7, 8]],
                    columns=["A", "B", "C", "D"],
                ),
            ),
            (
                ["A", "B", "C"],
                np.array([7, 8, 9], dtype=np.int64),
                DataFrame([[7, 8, 9], [7, 8, 9], [7, 8, 9]],
                          columns=["A", "B", "C"]),
            ),
            (
                ["B", "C", "D"],
                [[7, 8, 9], [10, 11, 12], [13, 14, 15]],
                DataFrame(
                    [[1, 7, 8, 9], [3, 10, 11, 12], [5, 13, 14, 15]],
                    columns=["A", "B", "C", "D"],
                ),
            ),
            (
                ["C", "A", "D"],
                np.array([[7, 8, 9], [10, 11, 12], [13, 14, 15]],
                         dtype=np.int64),
                DataFrame(
                    [[8, 2, 7, 9], [11, 4, 10, 12], [14, 6, 13, 15]],
                    columns=["A", "B", "C", "D"],
                ),
            ),
            (
                ["A", "C"],
                DataFrame([[7, 8], [9, 10], [11, 12]], columns=["A", "C"]),
                DataFrame([[7, 2, 8], [9, 4, 10], [11, 6, 12]],
                          columns=["A", "B", "C"]),
            ),
        ],
    )
    def test_setitem_list_missing_columns(self, columns, box, expected):
        # GH#29334
        df = DataFrame([[1, 2], [3, 4], [5, 6]], columns=["A", "B"])
        df[columns] = box
        tm.assert_frame_equal(df, expected)

    def test_setitem_list_of_tuples(self, float_frame):
        tuples = list(zip(float_frame["A"], float_frame["B"]))
        float_frame["tuples"] = tuples

        result = float_frame["tuples"]
        expected = Series(tuples, index=float_frame.index, name="tuples")
        tm.assert_series_equal(result, expected)

    def test_setitem_iloc_generator(self):
        # GH#39614
        df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
        indexer = (x for x in [1, 2])
        df.iloc[indexer] = 1
        expected = DataFrame({"a": [1, 1, 1], "b": [4, 1, 1]})
        tm.assert_frame_equal(df, expected)

    def test_setitem_iloc_two_dimensional_generator(self):
        df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
        indexer = (x for x in [1, 2])
        df.iloc[indexer, 1] = 1
        expected = DataFrame({"a": [1, 2, 3], "b": [4, 1, 1]})
        tm.assert_frame_equal(df, expected)

    def test_setitem_dtypes_bytes_type_to_object(self):
        # GH 20734
        index = Series(name="id", dtype="S24")
        df = DataFrame(index=index)
        df["a"] = Series(name="a", index=index, dtype=np.uint32)
        df["b"] = Series(name="b", index=index, dtype="S64")
        df["c"] = Series(name="c", index=index, dtype="S64")
        df["d"] = Series(name="d", index=index, dtype=np.uint8)
        result = df.dtypes
        expected = Series([np.uint32, object, object, np.uint8],
                          index=list("abcd"))
        tm.assert_series_equal(result, expected)

    def test_boolean_mask_nullable_int64(self):
        # GH 28928
        result = DataFrame({
            "a": [3, 4],
            "b": [5, 6]
        }).astype({
            "a": "int64",
            "b": "Int64"
        })
        mask = Series(False, index=result.index)
        result.loc[mask, "a"] = result["a"]
        result.loc[mask, "b"] = result["b"]
        expected = DataFrame({
            "a": [3, 4],
            "b": [5, 6]
        }).astype({
            "a": "int64",
            "b": "Int64"
        })
        tm.assert_frame_equal(result, expected)

    def test_setitem_ea_dtype_rhs_series(self):
        # GH#47425
        df = DataFrame({"a": [1, 2]})
        df["a"] = Series([1, 2], dtype="Int64")
        expected = DataFrame({"a": [1, 2]}, dtype="Int64")
        tm.assert_frame_equal(df, expected)

    # TODO(ArrayManager) set column with 2d column array, see #44788
    @td.skip_array_manager_not_yet_implemented
    def test_setitem_npmatrix_2d(self):
        # GH#42376
        # for use-case df["x"] = sparse.random(10, 10).mean(axis=1)
        expected = DataFrame(
            {
                "np-array": np.ones(10),
                "np-matrix": np.ones(10)
            },
            index=np.arange(10))

        a = np.ones((10, 1))
        df = DataFrame(index=np.arange(10))
        df["np-array"] = a

        # Instantiation of `np.matrix` gives PendingDeprecationWarning
        with tm.assert_produces_warning(PendingDeprecationWarning):
            df["np-matrix"] = np.matrix(a)

        tm.assert_frame_equal(df, expected)

    @pytest.mark.parametrize("vals", [{}, {"d": "a"}])
    def test_setitem_aligning_dict_with_index(self, vals):
        # GH#47216
        df = DataFrame({"a": [1, 2], "b": [3, 4], **vals})
        df.loc[:, "a"] = {1: 100, 0: 200}
        df.loc[:, "c"] = {0: 5, 1: 6}
        df.loc[:, "e"] = {1: 5}
        expected = DataFrame({
            "a": [200, 100],
            "b": [3, 4],
            **vals, "c": [5, 6],
            "e": [np.nan, 5]
        })
        tm.assert_frame_equal(df, expected)

    def test_setitem_rhs_dataframe(self):
        # GH#47578
        df = DataFrame({"a": [1, 2]})
        df["a"] = DataFrame({"a": [10, 11]}, index=[1, 2])
        expected = DataFrame({"a": [np.nan, 10]})
        tm.assert_frame_equal(df, expected)

        df = DataFrame({"a": [1, 2]})
        df.isetitem(0, DataFrame({"a": [10, 11]}, index=[1, 2]))
        tm.assert_frame_equal(df, expected)
Example #54
0
    def test_000constructor_resolution(self):
        # 2252
        t1 = Timestamp((1352934390 * 1000000000) + 1000000 + 1000 + 1)
        idx = DatetimeIndex([t1])

        assert idx.nanosecond[0] == t1.nanosecond
Example #55
0
 def test_constructor_with_int_tz(self, klass, box, tz, dtype):
     # GH 20997, 20964
     ts = Timestamp("2018-01-01", tz=tz)
     result = klass(box([ts.value]), dtype=dtype)
     expected = klass([ts])
     assert result == expected
Example #56
0
    tm.assert_frame_equal(result, expected)

    # prod
    result = df.groupby(pd.cut(df["a"], grps), observed=observed)._cython_agg_general(
        "prod"
    )
    expected = pd.DataFrame(
        {"a": [1, 1, 1716, 1]},
        index=pd.CategoricalIndex(intervals, name="a", ordered=True),
    )
    if observed:
        expected = expected[expected.a != 1]

    tm.assert_frame_equal(result, expected)


@pytest.mark.parametrize("op", ["first", "last", "max", "min"])
@pytest.mark.parametrize(
    "data", [Timestamp("2016-10-14 21:00:44.557"), Timedelta("17088 days 21:00:44.557")]
)
def test_cython_with_timestamp_and_nat(op, data):
    # https://github.com/pandas-dev/pandas/issues/19526
    df = DataFrame({"a": [0, 1], "b": [data, NaT]})
    index = Index([0, 1], name="a")

    # We will group by a and test the cython aggregations
    expected = DataFrame({"b": [data, NaT]}, index=index)

    result = df.groupby("a").aggregate(op)
    tm.assert_frame_equal(expected, result)
Example #57
0
 def test_compare_hour13(self):
     r = Timestamp("2000-08-12T13:00:00").to_julian_date()
     assert r == 2_451_769.0416666666666666
Example #58
0
 def test_constructor_with_non_normalized_pytz(self, tz):
     # GH 18595
     non_norm_tz = Timestamp("2010", tz=tz).tz
     result = DatetimeIndex(["2010"], tz=non_norm_tz)
     assert pytz.timezone(tz) is result.tz
Example #59
0
class TestTimedeltaMultiplicationDivision:
    """
    Tests for Timedelta methods:

        __mul__, __rmul__,
        __div__, __rdiv__,
        __truediv__, __rtruediv__,
        __floordiv__, __rfloordiv__,
        __mod__, __rmod__,
        __divmod__, __rdivmod__
    """

    # ---------------------------------------------------------------
    # Timedelta.__mul__, __rmul__

    @pytest.mark.parametrize(
        "td_nat", [NaT, np.timedelta64("NaT", "ns"), np.timedelta64("NaT")]
    )
    @pytest.mark.parametrize("op", [operator.mul, ops.rmul])
    def test_td_mul_nat(self, op, td_nat):
        # GH#19819
        td = Timedelta(10, unit="d")
        with pytest.raises(TypeError):
            op(td, td_nat)

    @pytest.mark.parametrize("nan", [np.nan, np.float64("NaN"), float("nan")])
    @pytest.mark.parametrize("op", [operator.mul, ops.rmul])
    def test_td_mul_nan(self, op, nan):
        # np.float64('NaN') has a 'dtype' attr, avoid treating as array
        td = Timedelta(10, unit="d")
        result = op(td, nan)
        assert result is NaT

    @pytest.mark.parametrize("op", [operator.mul, ops.rmul])
    def test_td_mul_scalar(self, op):
        # GH#19738
        td = Timedelta(minutes=3)

        result = op(td, 2)
        assert result == Timedelta(minutes=6)

        result = op(td, 1.5)
        assert result == Timedelta(minutes=4, seconds=30)

        assert op(td, np.nan) is NaT

        assert op(-1, td).value == -1 * td.value
        assert op(-1.0, td).value == -1.0 * td.value

        with pytest.raises(TypeError):
            # timedelta * datetime is gibberish
            op(td, Timestamp(2016, 1, 2))

        with pytest.raises(TypeError):
            # invalid multiply with another timedelta
            op(td, td)

    # ---------------------------------------------------------------
    # Timedelta.__div__, __truediv__

    def test_td_div_timedeltalike_scalar(self):
        # GH#19738
        td = Timedelta(10, unit="d")

        result = td / offsets.Hour(1)
        assert result == 240

        assert td / td == 1
        assert td / np.timedelta64(60, "h") == 4

        assert np.isnan(td / NaT)

    def test_td_div_numeric_scalar(self):
        # GH#19738
        td = Timedelta(10, unit="d")

        result = td / 2
        assert isinstance(result, Timedelta)
        assert result == Timedelta(days=5)

        result = td / 5.0
        assert isinstance(result, Timedelta)
        assert result == Timedelta(days=2)

    @pytest.mark.parametrize("nan", [np.nan, np.float64("NaN"), float("nan")])
    def test_td_div_nan(self, nan):
        # np.float64('NaN') has a 'dtype' attr, avoid treating as array
        td = Timedelta(10, unit="d")
        result = td / nan
        assert result is NaT

        result = td // nan
        assert result is NaT

    # ---------------------------------------------------------------
    # Timedelta.__rdiv__

    def test_td_rdiv_timedeltalike_scalar(self):
        # GH#19738
        td = Timedelta(10, unit="d")
        result = offsets.Hour(1) / td
        assert result == 1 / 240.0

        assert np.timedelta64(60, "h") / td == 0.25

    # ---------------------------------------------------------------
    # Timedelta.__floordiv__

    def test_td_floordiv_timedeltalike_scalar(self):
        # GH#18846
        td = Timedelta(hours=3, minutes=4)
        scalar = Timedelta(hours=3, minutes=3)

        assert td // scalar == 1
        assert -td // scalar.to_pytimedelta() == -2
        assert (2 * td) // scalar.to_timedelta64() == 2

    def test_td_floordiv_null_scalar(self):
        # GH#18846
        td = Timedelta(hours=3, minutes=4)

        assert td // np.nan is NaT
        assert np.isnan(td // NaT)
        assert np.isnan(td // np.timedelta64("NaT"))

    def test_td_floordiv_offsets(self):
        # GH#19738
        td = Timedelta(hours=3, minutes=4)
        assert td // offsets.Hour(1) == 3
        assert td // offsets.Minute(2) == 92

    def test_td_floordiv_invalid_scalar(self):
        # GH#18846
        td = Timedelta(hours=3, minutes=4)

        with pytest.raises(TypeError):
            td // np.datetime64("2016-01-01", dtype="datetime64[us]")

    def test_td_floordiv_numeric_scalar(self):
        # GH#18846
        td = Timedelta(hours=3, minutes=4)

        expected = Timedelta(hours=1, minutes=32)
        assert td // 2 == expected
        assert td // 2.0 == expected
        assert td // np.float64(2.0) == expected
        assert td // np.int32(2.0) == expected
        assert td // np.uint8(2.0) == expected

    def test_td_floordiv_timedeltalike_array(self):
        # GH#18846
        td = Timedelta(hours=3, minutes=4)
        scalar = Timedelta(hours=3, minutes=3)

        # Array-like others
        assert td // np.array(scalar.to_timedelta64()) == 1

        res = (3 * td) // np.array([scalar.to_timedelta64()])
        expected = np.array([3], dtype=np.int64)
        tm.assert_numpy_array_equal(res, expected)

        res = (10 * td) // np.array([scalar.to_timedelta64(), np.timedelta64("NaT")])
        expected = np.array([10, np.nan])
        tm.assert_numpy_array_equal(res, expected)

    def test_td_floordiv_numeric_series(self):
        # GH#18846
        td = Timedelta(hours=3, minutes=4)
        ser = pd.Series([1], dtype=np.int64)
        res = td // ser
        assert res.dtype.kind == "m"

    # ---------------------------------------------------------------
    # Timedelta.__rfloordiv__

    def test_td_rfloordiv_timedeltalike_scalar(self):
        # GH#18846
        td = Timedelta(hours=3, minutes=3)
        scalar = Timedelta(hours=3, minutes=4)

        # scalar others
        # x // Timedelta is defined only for timedelta-like x. int-like,
        # float-like, and date-like, in particular, should all either
        # a) raise TypeError directly or
        # b) return NotImplemented, following which the reversed
        #    operation will raise TypeError.
        assert td.__rfloordiv__(scalar) == 1
        assert (-td).__rfloordiv__(scalar.to_pytimedelta()) == -2
        assert (2 * td).__rfloordiv__(scalar.to_timedelta64()) == 0

    def test_td_rfloordiv_null_scalar(self):
        # GH#18846
        td = Timedelta(hours=3, minutes=3)

        assert np.isnan(td.__rfloordiv__(NaT))
        assert np.isnan(td.__rfloordiv__(np.timedelta64("NaT")))

    def test_td_rfloordiv_offsets(self):
        # GH#19738
        assert offsets.Hour(1) // Timedelta(minutes=25) == 2

    def test_td_rfloordiv_invalid_scalar(self):
        # GH#18846
        td = Timedelta(hours=3, minutes=3)

        dt64 = np.datetime64("2016-01-01", dtype="datetime64[us]")
        with pytest.raises(TypeError):
            td.__rfloordiv__(dt64)

    def test_td_rfloordiv_numeric_scalar(self):
        # GH#18846
        td = Timedelta(hours=3, minutes=3)

        assert td.__rfloordiv__(np.nan) is NotImplemented
        assert td.__rfloordiv__(3.5) is NotImplemented
        assert td.__rfloordiv__(2) is NotImplemented

        with pytest.raises(TypeError):
            td.__rfloordiv__(np.float64(2.0))
        with pytest.raises(TypeError):
            td.__rfloordiv__(np.uint8(9))
        with pytest.raises(TypeError, match="Invalid dtype"):
            # deprecated GH#19761, enforced GH#29797
            td.__rfloordiv__(np.int32(2.0))

    def test_td_rfloordiv_timedeltalike_array(self):
        # GH#18846
        td = Timedelta(hours=3, minutes=3)
        scalar = Timedelta(hours=3, minutes=4)

        # Array-like others
        assert td.__rfloordiv__(np.array(scalar.to_timedelta64())) == 1

        res = td.__rfloordiv__(np.array([(3 * scalar).to_timedelta64()]))
        expected = np.array([3], dtype=np.int64)
        tm.assert_numpy_array_equal(res, expected)

        arr = np.array([(10 * scalar).to_timedelta64(), np.timedelta64("NaT")])
        res = td.__rfloordiv__(arr)
        expected = np.array([10, np.nan])
        tm.assert_numpy_array_equal(res, expected)

    def test_td_rfloordiv_numeric_series(self):
        # GH#18846
        td = Timedelta(hours=3, minutes=3)
        ser = pd.Series([1], dtype=np.int64)
        res = td.__rfloordiv__(ser)
        assert res is NotImplemented

        with pytest.raises(TypeError, match="Invalid dtype"):
            # Deprecated GH#19761, enforced GH#29797
            # TODO: GH-19761. Change to TypeError.
            ser // td

    # ----------------------------------------------------------------
    # Timedelta.__mod__, __rmod__

    def test_mod_timedeltalike(self):
        # GH#19365
        td = Timedelta(hours=37)

        # Timedelta-like others
        result = td % Timedelta(hours=6)
        assert isinstance(result, Timedelta)
        assert result == Timedelta(hours=1)

        result = td % timedelta(minutes=60)
        assert isinstance(result, Timedelta)
        assert result == Timedelta(0)

        result = td % NaT
        assert result is NaT

    def test_mod_timedelta64_nat(self):
        # GH#19365
        td = Timedelta(hours=37)

        result = td % np.timedelta64("NaT", "ns")
        assert result is NaT

    def test_mod_timedelta64(self):
        # GH#19365
        td = Timedelta(hours=37)

        result = td % np.timedelta64(2, "h")
        assert isinstance(result, Timedelta)
        assert result == Timedelta(hours=1)

    def test_mod_offset(self):
        # GH#19365
        td = Timedelta(hours=37)

        result = td % offsets.Hour(5)
        assert isinstance(result, Timedelta)
        assert result == Timedelta(hours=2)

    def test_mod_numeric(self):
        # GH#19365
        td = Timedelta(hours=37)

        # Numeric Others
        result = td % 2
        assert isinstance(result, Timedelta)
        assert result == Timedelta(0)

        result = td % 1e12
        assert isinstance(result, Timedelta)
        assert result == Timedelta(minutes=3, seconds=20)

        result = td % int(1e12)
        assert isinstance(result, Timedelta)
        assert result == Timedelta(minutes=3, seconds=20)

    def test_mod_invalid(self):
        # GH#19365
        td = Timedelta(hours=37)

        with pytest.raises(TypeError):
            td % Timestamp("2018-01-22")

        with pytest.raises(TypeError):
            td % []

    def test_rmod_pytimedelta(self):
        # GH#19365
        td = Timedelta(minutes=3)

        result = timedelta(minutes=4) % td
        assert isinstance(result, Timedelta)
        assert result == Timedelta(minutes=1)

    def test_rmod_timedelta64(self):
        # GH#19365
        td = Timedelta(minutes=3)
        result = np.timedelta64(5, "m") % td
        assert isinstance(result, Timedelta)
        assert result == Timedelta(minutes=2)

    def test_rmod_invalid(self):
        # GH#19365
        td = Timedelta(minutes=3)

        with pytest.raises(TypeError):
            Timestamp("2018-01-22") % td

        with pytest.raises(TypeError):
            15 % td

        with pytest.raises(TypeError):
            16.0 % td

        with pytest.raises(TypeError):
            np.array([22, 24]) % td

    # ----------------------------------------------------------------
    # Timedelta.__divmod__, __rdivmod__

    def test_divmod_numeric(self):
        # GH#19365
        td = Timedelta(days=2, hours=6)

        result = divmod(td, 53 * 3600 * 1e9)
        assert result[0] == Timedelta(1, unit="ns")
        assert isinstance(result[1], Timedelta)
        assert result[1] == Timedelta(hours=1)

        assert result
        result = divmod(td, np.nan)
        assert result[0] is NaT
        assert result[1] is NaT

    def test_divmod(self):
        # GH#19365
        td = Timedelta(days=2, hours=6)

        result = divmod(td, timedelta(days=1))
        assert result[0] == 2
        assert isinstance(result[1], Timedelta)
        assert result[1] == Timedelta(hours=6)

        result = divmod(td, 54)
        assert result[0] == Timedelta(hours=1)
        assert isinstance(result[1], Timedelta)
        assert result[1] == Timedelta(0)

        result = divmod(td, NaT)
        assert np.isnan(result[0])
        assert result[1] is NaT

    def test_divmod_offset(self):
        # GH#19365
        td = Timedelta(days=2, hours=6)

        result = divmod(td, offsets.Hour(-4))
        assert result[0] == -14
        assert isinstance(result[1], Timedelta)
        assert result[1] == Timedelta(hours=-2)

    def test_divmod_invalid(self):
        # GH#19365
        td = Timedelta(days=2, hours=6)

        with pytest.raises(TypeError):
            divmod(td, Timestamp("2018-01-22"))

    def test_rdivmod_pytimedelta(self):
        # GH#19365
        result = divmod(timedelta(days=2, hours=6), Timedelta(days=1))
        assert result[0] == 2
        assert isinstance(result[1], Timedelta)
        assert result[1] == Timedelta(hours=6)

    def test_rdivmod_offset(self):
        result = divmod(offsets.Hour(54), Timedelta(hours=-4))
        assert result[0] == -14
        assert isinstance(result[1], Timedelta)
        assert result[1] == Timedelta(hours=-2)

    def test_rdivmod_invalid(self):
        # GH#19365
        td = Timedelta(minutes=3)

        with pytest.raises(TypeError):
            divmod(Timestamp("2018-01-22"), td)

        with pytest.raises(TypeError):
            divmod(15, td)

        with pytest.raises(TypeError):
            divmod(16.0, td)

        with pytest.raises(TypeError):
            divmod(np.array([22, 24]), td)

    # ----------------------------------------------------------------

    @pytest.mark.parametrize(
        "op", [operator.mul, ops.rmul, operator.truediv, ops.rdiv, ops.rsub]
    )
    @pytest.mark.parametrize(
        "arr",
        [
            np.array([Timestamp("20130101 9:01"), Timestamp("20121230 9:02")]),
            np.array([Timestamp.now(), Timedelta("1D")]),
        ],
    )
    def test_td_op_timedelta_timedeltalike_array(self, op, arr):
        with pytest.raises(TypeError):
            op(arr, Timedelta("1D"))
Example #60
0
class TestContains:
    def test_contains(self):

        ci = CategoricalIndex(list("aabbca"),
                              categories=list("cabdef"),
                              ordered=False)

        assert "a" in ci
        assert "z" not in ci
        assert "e" not in ci
        assert np.nan not in ci

        # assert codes NOT in index
        assert 0 not in ci
        assert 1 not in ci

    def test_contains_nan(self):
        ci = CategoricalIndex(list("aabbca") + [np.nan],
                              categories=list("cabdef"))
        assert np.nan in ci

    @pytest.mark.parametrize("unwrap", [True, False])
    def test_contains_na_dtype(self, unwrap):
        dti = pd.date_range("2016-01-01", periods=100).insert(0, pd.NaT)
        pi = dti.to_period("D")
        tdi = dti - dti[-1]
        ci = CategoricalIndex(dti)

        obj = ci
        if unwrap:
            obj = ci._data

        assert np.nan in obj
        assert None in obj
        assert pd.NaT in obj
        assert np.datetime64("NaT") in obj
        assert np.timedelta64("NaT") not in obj

        obj2 = CategoricalIndex(tdi)
        if unwrap:
            obj2 = obj2._data

        assert np.nan in obj2
        assert None in obj2
        assert pd.NaT in obj2
        assert np.datetime64("NaT") not in obj2
        assert np.timedelta64("NaT") in obj2

        obj3 = CategoricalIndex(pi)
        if unwrap:
            obj3 = obj3._data

        assert np.nan in obj3
        assert None in obj3
        assert pd.NaT in obj3
        assert np.datetime64("NaT") not in obj3
        assert np.timedelta64("NaT") not in obj3

    @pytest.mark.parametrize(
        "item, expected",
        [
            (pd.Interval(0, 1), True),
            (1.5, True),
            (pd.Interval(0.5, 1.5), False),
            ("a", False),
            (Timestamp(1), False),
            (pd.Timedelta(1), False),
        ],
        ids=str,
    )
    def test_contains_interval(self, item, expected):
        # GH 23705
        ci = CategoricalIndex(IntervalIndex.from_breaks(range(3)))
        result = item in ci
        assert result is expected

    def test_contains_list(self):
        # GH#21729
        idx = CategoricalIndex([1, 2, 3])

        assert "a" not in idx

        with pytest.raises(TypeError, match="unhashable type"):
            ["a"] in idx

        with pytest.raises(TypeError, match="unhashable type"):
            ["a", "b"] in idx