Example #1
0
File: ops.py Project: pydata/pandas
    def convert_values(self):
        """Convert datetimes to a comparable value in an expression.
        """
        def stringify(value):
            if self.encoding is not None:
                encoder = partial(pprint_thing_encoded,
                                  encoding=self.encoding)
            else:
                encoder = pprint_thing
            return encoder(value)

        lhs, rhs = self.lhs, self.rhs

        if is_term(lhs) and lhs.is_datetime and is_term(rhs) and rhs.is_scalar:
            v = rhs.value
            if isinstance(v, (int, float)):
                v = stringify(v)
            v = Timestamp(_ensure_decoded(v))
            if v.tz is not None:
                v = v.tz_convert('UTC')
            self.rhs.update(v)

        if is_term(rhs) and rhs.is_datetime and is_term(lhs) and lhs.is_scalar:
            v = lhs.value
            if isinstance(v, (int, float)):
                v = stringify(v)
            v = Timestamp(_ensure_decoded(v))
            if v.tz is not None:
                v = v.tz_convert('UTC')
            self.lhs.update(v)
Example #2
0
    def convert_value(self, v):
        """ convert the expression that is in the term to something that is
        accepted by pytables """

        def stringify(value):
            if self.encoding is not None:
                encoder = partial(pprint_thing_encoded,
                                  encoding=self.encoding)
            else:
                encoder = pprint_thing
            return encoder(value)

        kind = _ensure_decoded(self.kind)
        meta = _ensure_decoded(self.meta)
        if kind == 'datetime64' or kind == 'datetime':
            if isinstance(v, (int, float)):
                v = stringify(v)
            v = _ensure_decoded(v)
            v = Timestamp(v)
            if v.tz is not None:
                v = v.tz_convert('UTC')
            return TermValue(v, v.value, kind)
        elif kind == 'timedelta64' or kind == 'timedelta':
            v = Timedelta(v, unit='s').value
            return TermValue(int(v), v, kind)
        elif meta == 'category':
            metadata = com.values_from_object(self.metadata)
            result = metadata.searchsorted(v, side='left')

            # result returns 0 if v is first element or if v is not in metadata
            # check that metadata contains v
            if not result and v not in metadata:
                result = -1
            return TermValue(result, result, 'integer')
        elif kind == 'integer':
            v = int(float(v))
            return TermValue(v, v, kind)
        elif kind == 'float':
            v = float(v)
            return TermValue(v, v, kind)
        elif kind == 'bool':
            if isinstance(v, str):
                v = not v.strip().lower() in ['false', 'f', 'no',
                                              'n', 'none', '0',
                                              '[]', '{}', '']
            else:
                v = bool(v)
            return TermValue(v, v, kind)
        elif isinstance(v, str):
            # string quoting
            return TermValue(v, stringify(v), 'string')
        else:
            raise TypeError("Cannot compare {v} of type {typ} to {kind} column"
                            .format(v=v, typ=type(v), kind=kind))
Example #3
0
    def test_rollforward(self, offset_types):
        expecteds = self.expecteds.copy()

        # result will not be changed if the target is on the offset
        no_changes = [
            "Day",
            "MonthBegin",
            "SemiMonthBegin",
            "YearBegin",
            "Week",
            "Hour",
            "Minute",
            "Second",
            "Milli",
            "Micro",
            "Nano",
            "DateOffset",
        ]
        for n in no_changes:
            expecteds[n] = Timestamp("2011/01/01 09:00")

        expecteds["BusinessHour"] = Timestamp("2011-01-03 09:00:00")
        expecteds["CustomBusinessHour"] = Timestamp("2011-01-03 09:00:00")

        # but be changed when normalize=True
        norm_expected = expecteds.copy()
        for k in norm_expected:
            norm_expected[k] = Timestamp(norm_expected[k].date())

        normalized = {
            "Day": Timestamp("2011-01-02 00:00:00"),
            "DateOffset": Timestamp("2011-01-02 00:00:00"),
            "MonthBegin": Timestamp("2011-02-01 00:00:00"),
            "SemiMonthBegin": Timestamp("2011-01-15 00:00:00"),
            "YearBegin": Timestamp("2012-01-01 00:00:00"),
            "Week": Timestamp("2011-01-08 00:00:00"),
            "Hour": Timestamp("2011-01-01 00:00:00"),
            "Minute": Timestamp("2011-01-01 00:00:00"),
            "Second": Timestamp("2011-01-01 00:00:00"),
            "Milli": Timestamp("2011-01-01 00:00:00"),
            "Micro": Timestamp("2011-01-01 00:00:00"),
        }
        norm_expected.update(normalized)

        sdt = datetime(2011, 1, 1, 9, 0)
        ndt = np.datetime64("2011-01-01 09:00")

        for dt in [sdt, ndt]:
            expected = expecteds[offset_types.__name__]
            self._check_offsetfunc_works(offset_types, "rollforward", dt,
                                         expected)
            expected = norm_expected[offset_types.__name__]
            self._check_offsetfunc_works(offset_types,
                                         "rollforward",
                                         dt,
                                         expected,
                                         normalize=True)
Example #4
0
class TestCommon(Base):
    # executed value created by Base._get_offset
    # are applied to 2011/01/01 09:00 (Saturday)
    # used for .apply and .rollforward
    expecteds = {
        "Day": Timestamp("2011-01-02 09:00:00"),
        "DateOffset": Timestamp("2011-01-02 09:00:00"),
        "BusinessDay": Timestamp("2011-01-03 09:00:00"),
        "CustomBusinessDay": Timestamp("2011-01-03 09:00:00"),
        "CustomBusinessMonthEnd": Timestamp("2011-01-31 09:00:00"),
        "CustomBusinessMonthBegin": Timestamp("2011-01-03 09:00:00"),
        "MonthBegin": Timestamp("2011-02-01 09:00:00"),
        "BusinessMonthBegin": Timestamp("2011-01-03 09:00:00"),
        "MonthEnd": Timestamp("2011-01-31 09:00:00"),
        "SemiMonthEnd": Timestamp("2011-01-15 09:00:00"),
        "SemiMonthBegin": Timestamp("2011-01-15 09:00:00"),
        "BusinessMonthEnd": Timestamp("2011-01-31 09:00:00"),
        "YearBegin": Timestamp("2012-01-01 09:00:00"),
        "BYearBegin": Timestamp("2011-01-03 09:00:00"),
        "YearEnd": Timestamp("2011-12-31 09:00:00"),
        "BYearEnd": Timestamp("2011-12-30 09:00:00"),
        "QuarterBegin": Timestamp("2011-03-01 09:00:00"),
        "BQuarterBegin": Timestamp("2011-03-01 09:00:00"),
        "QuarterEnd": Timestamp("2011-03-31 09:00:00"),
        "BQuarterEnd": Timestamp("2011-03-31 09:00:00"),
        "BusinessHour": Timestamp("2011-01-03 10:00:00"),
        "CustomBusinessHour": Timestamp("2011-01-03 10:00:00"),
        "WeekOfMonth": Timestamp("2011-01-08 09:00:00"),
        "LastWeekOfMonth": Timestamp("2011-01-29 09:00:00"),
        "FY5253Quarter": Timestamp("2011-01-25 09:00:00"),
        "FY5253": Timestamp("2011-01-25 09:00:00"),
        "Week": Timestamp("2011-01-08 09:00:00"),
        "Easter": Timestamp("2011-04-24 09:00:00"),
        "Hour": Timestamp("2011-01-01 10:00:00"),
        "Minute": Timestamp("2011-01-01 09:01:00"),
        "Second": Timestamp("2011-01-01 09:00:01"),
        "Milli": Timestamp("2011-01-01 09:00:00.001000"),
        "Micro": Timestamp("2011-01-01 09:00:00.000001"),
        "Nano":
        Timestamp(np_datetime64_compat("2011-01-01T09:00:00.000000001Z")),
    }

    def test_immutable(self, offset_types):
        # GH#21341 check that __setattr__ raises
        offset = self._get_offset(offset_types)
        msg = "objects is not writable|DateOffset objects are immutable"
        with pytest.raises(AttributeError, match=msg):
            offset.normalize = True
        with pytest.raises(AttributeError, match=msg):
            offset.n = 91

    def test_return_type(self, offset_types):
        offset = self._get_offset(offset_types)

        # make sure that we are returning a Timestamp
        result = Timestamp("20080101") + offset
        assert isinstance(result, Timestamp)

        # make sure that we are returning NaT
        assert NaT + offset is NaT
        assert offset + NaT is NaT

        assert NaT - offset is NaT
        assert (-offset).apply(NaT) is NaT

    def test_offset_n(self, offset_types):
        offset = self._get_offset(offset_types)
        assert offset.n == 1

        neg_offset = offset * -1
        assert neg_offset.n == -1

        mul_offset = offset * 3
        assert mul_offset.n == 3

    def test_offset_timedelta64_arg(self, offset_types):
        # check that offset._validate_n raises TypeError on a timedelt64
        #  object
        off = self._get_offset(offset_types)

        td64 = np.timedelta64(4567, "s")
        with pytest.raises(TypeError, match="argument must be an integer"):
            type(off)(n=td64, **off.kwds)

    def test_offset_mul_ndarray(self, offset_types):
        off = self._get_offset(offset_types)

        expected = np.array([[off, off * 2], [off * 3, off * 4]])

        result = np.array([[1, 2], [3, 4]]) * off
        tm.assert_numpy_array_equal(result, expected)

        result = off * np.array([[1, 2], [3, 4]])
        tm.assert_numpy_array_equal(result, expected)

    def test_offset_freqstr(self, offset_types):
        offset = self._get_offset(offset_types)

        freqstr = offset.freqstr
        if freqstr not in ("<Easter>", "<DateOffset: days=1>", "LWOM-SAT"):
            code = _get_offset(freqstr)
            assert offset.rule_code == code

    def _check_offsetfunc_works(self,
                                offset,
                                funcname,
                                dt,
                                expected,
                                normalize=False):

        if normalize and issubclass(offset, Tick):
            # normalize=True disallowed for Tick subclasses GH#21427
            return

        offset_s = self._get_offset(offset, normalize=normalize)
        func = getattr(offset_s, funcname)

        result = func(dt)
        assert isinstance(result, Timestamp)
        assert result == expected

        result = func(Timestamp(dt))
        assert isinstance(result, Timestamp)
        assert result == expected

        # see gh-14101
        exp_warning = None
        ts = Timestamp(dt) + Nano(5)

        if (type(offset_s).__name__ == "DateOffset"
                and (funcname == "apply" or normalize) and ts.nanosecond > 0):
            exp_warning = UserWarning

        # test nanosecond is preserved
        with tm.assert_produces_warning(exp_warning):
            result = func(ts)
        assert isinstance(result, Timestamp)
        if normalize is False:
            assert result == expected + Nano(5)
        else:
            assert result == expected

        if isinstance(dt, np.datetime64):
            # test tz when input is datetime or Timestamp
            return

        for tz in self.timezones:
            expected_localize = expected.tz_localize(tz)
            tz_obj = timezones.maybe_get_tz(tz)
            dt_tz = conversion.localize_pydatetime(dt, tz_obj)

            result = func(dt_tz)
            assert isinstance(result, Timestamp)
            assert result == expected_localize

            result = func(Timestamp(dt, tz=tz))
            assert isinstance(result, Timestamp)
            assert result == expected_localize

            # see gh-14101
            exp_warning = None
            ts = Timestamp(dt, tz=tz) + Nano(5)

            if (type(offset_s).__name__ == "DateOffset"
                    and (funcname == "apply" or normalize)
                    and ts.nanosecond > 0):
                exp_warning = UserWarning

            # test nanosecond is preserved
            with tm.assert_produces_warning(exp_warning):
                result = func(ts)
            assert isinstance(result, Timestamp)
            if normalize is False:
                assert result == expected_localize + Nano(5)
            else:
                assert result == expected_localize

    def test_apply(self, offset_types):
        sdt = datetime(2011, 1, 1, 9, 0)
        ndt = np_datetime64_compat("2011-01-01 09:00Z")

        for dt in [sdt, ndt]:
            expected = self.expecteds[offset_types.__name__]
            self._check_offsetfunc_works(offset_types, "apply", dt, expected)

            expected = Timestamp(expected.date())
            self._check_offsetfunc_works(offset_types,
                                         "apply",
                                         dt,
                                         expected,
                                         normalize=True)

    def test_rollforward(self, offset_types):
        expecteds = self.expecteds.copy()

        # result will not be changed if the target is on the offset
        no_changes = [
            "Day",
            "MonthBegin",
            "SemiMonthBegin",
            "YearBegin",
            "Week",
            "Hour",
            "Minute",
            "Second",
            "Milli",
            "Micro",
            "Nano",
            "DateOffset",
        ]
        for n in no_changes:
            expecteds[n] = Timestamp("2011/01/01 09:00")

        expecteds["BusinessHour"] = Timestamp("2011-01-03 09:00:00")
        expecteds["CustomBusinessHour"] = Timestamp("2011-01-03 09:00:00")

        # but be changed when normalize=True
        norm_expected = expecteds.copy()
        for k in norm_expected:
            norm_expected[k] = Timestamp(norm_expected[k].date())

        normalized = {
            "Day": Timestamp("2011-01-02 00:00:00"),
            "DateOffset": Timestamp("2011-01-02 00:00:00"),
            "MonthBegin": Timestamp("2011-02-01 00:00:00"),
            "SemiMonthBegin": Timestamp("2011-01-15 00:00:00"),
            "YearBegin": Timestamp("2012-01-01 00:00:00"),
            "Week": Timestamp("2011-01-08 00:00:00"),
            "Hour": Timestamp("2011-01-01 00:00:00"),
            "Minute": Timestamp("2011-01-01 00:00:00"),
            "Second": Timestamp("2011-01-01 00:00:00"),
            "Milli": Timestamp("2011-01-01 00:00:00"),
            "Micro": Timestamp("2011-01-01 00:00:00"),
        }
        norm_expected.update(normalized)

        sdt = datetime(2011, 1, 1, 9, 0)
        ndt = np_datetime64_compat("2011-01-01 09:00Z")

        for dt in [sdt, ndt]:
            expected = expecteds[offset_types.__name__]
            self._check_offsetfunc_works(offset_types, "rollforward", dt,
                                         expected)
            expected = norm_expected[offset_types.__name__]
            self._check_offsetfunc_works(offset_types,
                                         "rollforward",
                                         dt,
                                         expected,
                                         normalize=True)

    def test_rollback(self, offset_types):
        expecteds = {
            "BusinessDay": Timestamp("2010-12-31 09:00:00"),
            "CustomBusinessDay": Timestamp("2010-12-31 09:00:00"),
            "CustomBusinessMonthEnd": Timestamp("2010-12-31 09:00:00"),
            "CustomBusinessMonthBegin": Timestamp("2010-12-01 09:00:00"),
            "BusinessMonthBegin": Timestamp("2010-12-01 09:00:00"),
            "MonthEnd": Timestamp("2010-12-31 09:00:00"),
            "SemiMonthEnd": Timestamp("2010-12-31 09:00:00"),
            "BusinessMonthEnd": Timestamp("2010-12-31 09:00:00"),
            "BYearBegin": Timestamp("2010-01-01 09:00:00"),
            "YearEnd": Timestamp("2010-12-31 09:00:00"),
            "BYearEnd": Timestamp("2010-12-31 09:00:00"),
            "QuarterBegin": Timestamp("2010-12-01 09:00:00"),
            "BQuarterBegin": Timestamp("2010-12-01 09:00:00"),
            "QuarterEnd": Timestamp("2010-12-31 09:00:00"),
            "BQuarterEnd": Timestamp("2010-12-31 09:00:00"),
            "BusinessHour": Timestamp("2010-12-31 17:00:00"),
            "CustomBusinessHour": Timestamp("2010-12-31 17:00:00"),
            "WeekOfMonth": Timestamp("2010-12-11 09:00:00"),
            "LastWeekOfMonth": Timestamp("2010-12-25 09:00:00"),
            "FY5253Quarter": Timestamp("2010-10-26 09:00:00"),
            "FY5253": Timestamp("2010-01-26 09:00:00"),
            "Easter": Timestamp("2010-04-04 09:00:00"),
        }

        # result will not be changed if the target is on the offset
        for n in [
                "Day",
                "MonthBegin",
                "SemiMonthBegin",
                "YearBegin",
                "Week",
                "Hour",
                "Minute",
                "Second",
                "Milli",
                "Micro",
                "Nano",
                "DateOffset",
        ]:
            expecteds[n] = Timestamp("2011/01/01 09:00")

        # but be changed when normalize=True
        norm_expected = expecteds.copy()
        for k in norm_expected:
            norm_expected[k] = Timestamp(norm_expected[k].date())

        normalized = {
            "Day": Timestamp("2010-12-31 00:00:00"),
            "DateOffset": Timestamp("2010-12-31 00:00:00"),
            "MonthBegin": Timestamp("2010-12-01 00:00:00"),
            "SemiMonthBegin": Timestamp("2010-12-15 00:00:00"),
            "YearBegin": Timestamp("2010-01-01 00:00:00"),
            "Week": Timestamp("2010-12-25 00:00:00"),
            "Hour": Timestamp("2011-01-01 00:00:00"),
            "Minute": Timestamp("2011-01-01 00:00:00"),
            "Second": Timestamp("2011-01-01 00:00:00"),
            "Milli": Timestamp("2011-01-01 00:00:00"),
            "Micro": Timestamp("2011-01-01 00:00:00"),
        }
        norm_expected.update(normalized)

        sdt = datetime(2011, 1, 1, 9, 0)
        ndt = np_datetime64_compat("2011-01-01 09:00Z")

        for dt in [sdt, ndt]:
            expected = expecteds[offset_types.__name__]
            self._check_offsetfunc_works(offset_types, "rollback", dt,
                                         expected)

            expected = norm_expected[offset_types.__name__]
            self._check_offsetfunc_works(offset_types,
                                         "rollback",
                                         dt,
                                         expected,
                                         normalize=True)

    def test_is_on_offset(self, offset_types):
        dt = self.expecteds[offset_types.__name__]
        offset_s = self._get_offset(offset_types)
        assert offset_s.is_on_offset(dt)

        # when normalize=True, is_on_offset checks time is 00:00:00
        if issubclass(offset_types, Tick):
            # normalize=True disallowed for Tick subclasses GH#21427
            return
        offset_n = self._get_offset(offset_types, normalize=True)
        assert not offset_n.is_on_offset(dt)

        if offset_types in (BusinessHour, CustomBusinessHour):
            # In default BusinessHour (9:00-17:00), normalized time
            # cannot be in business hour range
            return
        date = datetime(dt.year, dt.month, dt.day)
        assert offset_n.is_on_offset(date)

    def test_add(self, offset_types, tz_naive_fixture):
        tz = tz_naive_fixture
        dt = datetime(2011, 1, 1, 9, 0)

        offset_s = self._get_offset(offset_types)
        expected = self.expecteds[offset_types.__name__]

        result_dt = dt + offset_s
        result_ts = Timestamp(dt) + offset_s
        for result in [result_dt, result_ts]:
            assert isinstance(result, Timestamp)
            assert result == expected

        expected_localize = expected.tz_localize(tz)
        result = Timestamp(dt, tz=tz) + offset_s
        assert isinstance(result, Timestamp)
        assert result == expected_localize

        # normalize=True, disallowed for Tick subclasses GH#21427
        if issubclass(offset_types, Tick):
            return
        offset_s = self._get_offset(offset_types, normalize=True)
        expected = Timestamp(expected.date())

        result_dt = dt + offset_s
        result_ts = Timestamp(dt) + offset_s
        for result in [result_dt, result_ts]:
            assert isinstance(result, Timestamp)
            assert result == expected

        expected_localize = expected.tz_localize(tz)
        result = Timestamp(dt, tz=tz) + offset_s
        assert isinstance(result, Timestamp)
        assert result == expected_localize

    def test_add_empty_datetimeindex(self, offset_types, tz_naive_fixture):
        # GH#12724, GH#30336
        offset_s = self._get_offset(offset_types)

        dti = DatetimeIndex([], tz=tz_naive_fixture)

        warn = None
        if isinstance(
                offset_s,
            (
                Easter,
                WeekOfMonth,
                LastWeekOfMonth,
                CustomBusinessDay,
                BusinessHour,
                CustomBusinessHour,
                CustomBusinessMonthBegin,
                CustomBusinessMonthEnd,
                FY5253,
                FY5253Quarter,
            ),
        ):
            # We don't have an optimized apply_index
            warn = PerformanceWarning

        with tm.assert_produces_warning(warn):
            result = dti + offset_s
        tm.assert_index_equal(result, dti)
        with tm.assert_produces_warning(warn):
            result = offset_s + dti
        tm.assert_index_equal(result, dti)

        dta = dti._data
        with tm.assert_produces_warning(warn):
            result = dta + offset_s
        tm.assert_equal(result, dta)
        with tm.assert_produces_warning(warn):
            result = offset_s + dta
        tm.assert_equal(result, dta)

    def test_pickle_roundtrip(self, offset_types):
        off = self._get_offset(offset_types)
        res = tm.round_trip_pickle(off)
        assert off == res
        if type(off) is not DateOffset:
            for attr in off._attributes:
                if attr == "calendar":
                    # np.busdaycalendar __eq__ will return False;
                    #  we check holidays and weekmask attrs so are OK
                    continue
                # Make sure nothings got lost from _params (which __eq__) is based on
                assert getattr(off, attr) == getattr(res, attr)

    def test_pickle_dateoffset_odd_inputs(self):
        # GH#34511
        off = DateOffset(months=12)
        res = tm.round_trip_pickle(off)
        assert off == res

        base_dt = datetime(2020, 1, 1)
        assert base_dt + off == base_dt + res

    def test_onOffset_deprecated(self, offset_types):
        # GH#30340 use idiomatic naming
        off = self._get_offset(offset_types)

        ts = Timestamp.now()
        with tm.assert_produces_warning(FutureWarning):
            result = off.onOffset(ts)

        expected = off.is_on_offset(ts)
        assert result == expected

    def test_isAnchored_deprecated(self, offset_types):
        # GH#30340 use idiomatic naming
        off = self._get_offset(offset_types)

        with tm.assert_produces_warning(FutureWarning):
            result = off.isAnchored()

        expected = off.is_anchored()
        assert result == expected

    def test_offsets_hashable(self, offset_types):
        # GH: 37267
        off = self._get_offset(offset_types)
        assert hash(off) is not None
Example #5
0
    def _check_offsetfunc_works(self,
                                offset,
                                funcname,
                                dt,
                                expected,
                                normalize=False):

        if normalize and issubclass(offset, Tick):
            # normalize=True disallowed for Tick subclasses GH#21427
            return

        offset_s = self._get_offset(offset, normalize=normalize)
        func = getattr(offset_s, funcname)

        result = func(dt)
        assert isinstance(result, Timestamp)
        assert result == expected

        result = func(Timestamp(dt))
        assert isinstance(result, Timestamp)
        assert result == expected

        # see gh-14101
        exp_warning = None
        ts = Timestamp(dt) + Nano(5)

        if (type(offset_s).__name__ == "DateOffset"
                and (funcname == "apply" or normalize) and ts.nanosecond > 0):
            exp_warning = UserWarning

        # test nanosecond is preserved
        with tm.assert_produces_warning(exp_warning):
            result = func(ts)
        assert isinstance(result, Timestamp)
        if normalize is False:
            assert result == expected + Nano(5)
        else:
            assert result == expected

        if isinstance(dt, np.datetime64):
            # test tz when input is datetime or Timestamp
            return

        for tz in self.timezones:
            expected_localize = expected.tz_localize(tz)
            tz_obj = timezones.maybe_get_tz(tz)
            dt_tz = conversion.localize_pydatetime(dt, tz_obj)

            result = func(dt_tz)
            assert isinstance(result, Timestamp)
            assert result == expected_localize

            result = func(Timestamp(dt, tz=tz))
            assert isinstance(result, Timestamp)
            assert result == expected_localize

            # see gh-14101
            exp_warning = None
            ts = Timestamp(dt, tz=tz) + Nano(5)

            if (type(offset_s).__name__ == "DateOffset"
                    and (funcname == "apply" or normalize)
                    and ts.nanosecond > 0):
                exp_warning = UserWarning

            # test nanosecond is preserved
            with tm.assert_produces_warning(exp_warning):
                result = func(ts)
            assert isinstance(result, Timestamp)
            if normalize is False:
                assert result == expected_localize + Nano(5)
            else:
                assert result == expected_localize
Example #6
0
def generate_regular_range(start: Timestamp, end: Timestamp, periods: int,
                           freq: DateOffset) -> Tuple[np.ndarray, str]:
    """
    Generate a range of dates with the spans between dates described by
    the given `freq` DateOffset.

    Parameters
    ----------
    start : Timestamp or None
        first point of produced date range
    end : Timestamp or None
        last point of produced date range
    periods : int
        number of periods in produced date range
    freq : DateOffset
        describes space between dates in produced date range

    Returns
    -------
    ndarray[np.int64] representing nanosecond unix timestamps
    """
    if isinstance(freq, Tick):
        stride = freq.nanos
        if periods is None:
            b = Timestamp(start).value
            # cannot just use e = Timestamp(end) + 1 because arange breaks when
            # stride is too large, see GH10887
            e = b + (Timestamp(end).value -
                     b) // stride * stride + stride // 2 + 1
            # end.tz == start.tz by this point due to _generate implementation
            tz = start.tz
        elif start is not None:
            b = Timestamp(start).value
            e = _generate_range_overflow_safe(b, periods, stride, side="start")
            tz = start.tz
        elif end is not None:
            e = Timestamp(end).value + stride
            b = _generate_range_overflow_safe(e, periods, stride, side="end")
            tz = end.tz
        else:
            raise ValueError("at least 'start' or 'end' should be specified "
                             "if a 'period' is given.")

        with np.errstate(over="raise"):
            # If the range is sufficiently large, np.arange may overflow
            #  and incorrectly return an empty array if not caught.
            try:
                values = np.arange(b, e, stride, dtype=np.int64)
            except FloatingPointError:
                xdr = [b]
                while xdr[-1] != e:
                    xdr.append(xdr[-1] + stride)
                values = np.array(xdr[:-1], dtype=np.int64)

    else:
        tz = None
        # start and end should have the same timezone by this point
        if start is not None:
            tz = start.tz
        elif end is not None:
            tz = end.tz

        xdr = generate_range(start=start,
                             end=end,
                             periods=periods,
                             offset=freq)

        values = np.array([x.value for x in xdr], dtype=np.int64)

    return values, tz
Example #7
0
 def rep_stamp(self):
     return Timestamp(self.values[0])
class TestCustomBusinessHour(Base):
    _offset = CustomBusinessHour
    holidays = [
        "2014-06-27",
        datetime(2014, 6, 30),
        np.datetime64("2014-07-02")
    ]

    def setup_method(self, method):
        # 2014 Calendar to check custom holidays
        #   Sun Mon Tue Wed Thu Fri Sat
        #  6/22  23  24  25  26  27  28
        #    29  30 7/1   2   3   4   5
        #     6   7   8   9  10  11  12
        self.d = datetime(2014, 7, 1, 10, 00)
        self.offset1 = CustomBusinessHour(weekmask="Tue Wed Thu Fri")

        self.offset2 = CustomBusinessHour(holidays=self.holidays)

    def test_constructor_errors(self):
        from datetime import time as dt_time

        msg = "time data must be specified only with hour and minute"
        with pytest.raises(ValueError, match=msg):
            CustomBusinessHour(start=dt_time(11, 0, 5))
        msg = "time data must match '%H:%M' format"
        with pytest.raises(ValueError, match=msg):
            CustomBusinessHour(start="AAA")
        msg = "time data must match '%H:%M' format"
        with pytest.raises(ValueError, match=msg):
            CustomBusinessHour(start="14:00:05")

    def test_different_normalize_equals(self):
        # GH#21404 changed __eq__ to return False when `normalize` does not match
        offset = self._offset()
        offset2 = self._offset(normalize=True)
        assert offset != offset2

    def test_repr(self):
        assert repr(self.offset1) == "<CustomBusinessHour: CBH=09:00-17:00>"
        assert repr(self.offset2) == "<CustomBusinessHour: CBH=09:00-17:00>"

    def test_with_offset(self):
        expected = Timestamp("2014-07-01 13:00")

        assert self.d + CustomBusinessHour() * 3 == expected
        assert self.d + CustomBusinessHour(n=3) == expected

    def test_eq(self):
        for offset in [self.offset1, self.offset2]:
            assert offset == offset

        assert CustomBusinessHour() != CustomBusinessHour(-1)
        assert CustomBusinessHour(start="09:00") == CustomBusinessHour()
        assert CustomBusinessHour(start="09:00") != CustomBusinessHour(
            start="09:01")
        assert CustomBusinessHour(
            start="09:00", end="17:00") != CustomBusinessHour(start="17:00",
                                                              end="09:01")

        assert CustomBusinessHour(
            weekmask="Tue Wed Thu Fri") != CustomBusinessHour(
                weekmask="Mon Tue Wed Thu Fri")
        assert CustomBusinessHour(
            holidays=["2014-06-27"]) != CustomBusinessHour(
                holidays=["2014-06-28"])

    def test_sub(self):
        # override the Base.test_sub implementation because self.offset2 is
        # defined differently in this class than the test expects
        pass

    def test_hash(self):
        assert hash(self.offset1) == hash(self.offset1)
        assert hash(self.offset2) == hash(self.offset2)

    def test_call(self):
        with tm.assert_produces_warning(FutureWarning):
            # GH#34171 DateOffset.__call__ is deprecated
            assert self.offset1(self.d) == datetime(2014, 7, 1, 11)
            assert self.offset2(self.d) == datetime(2014, 7, 1, 11)

    def testRollback1(self):
        assert self.offset1.rollback(self.d) == self.d
        assert self.offset2.rollback(self.d) == self.d

        d = datetime(2014, 7, 1, 0)

        # 2014/07/01 is Tuesday, 06/30 is Monday(holiday)
        assert self.offset1.rollback(d) == datetime(2014, 6, 27, 17)

        # 2014/6/30 and 2014/6/27 are holidays
        assert self.offset2.rollback(d) == datetime(2014, 6, 26, 17)

    def testRollback2(self):
        assert self._offset(-3).rollback(datetime(2014, 7, 5, 15,
                                                  0)) == datetime(
                                                      2014, 7, 4, 17, 0)

    def testRollforward1(self):
        assert self.offset1.rollforward(self.d) == self.d
        assert self.offset2.rollforward(self.d) == self.d

        d = datetime(2014, 7, 1, 0)
        assert self.offset1.rollforward(d) == datetime(2014, 7, 1, 9)
        assert self.offset2.rollforward(d) == datetime(2014, 7, 1, 9)

    def testRollforward2(self):
        assert self._offset(-3).rollforward(datetime(2014, 7, 5, 16,
                                                     0)) == datetime(
                                                         2014, 7, 7, 9)

    def test_roll_date_object(self):
        offset = BusinessHour()

        dt = datetime(2014, 7, 6, 15, 0)

        result = offset.rollback(dt)
        assert result == datetime(2014, 7, 4, 17)

        result = offset.rollforward(dt)
        assert result == datetime(2014, 7, 7, 9)

    normalize_cases = [
        (
            CustomBusinessHour(normalize=True, holidays=holidays),
            {
                datetime(2014, 7, 1, 8): datetime(2014, 7, 1),
                datetime(2014, 7, 1, 17): datetime(2014, 7, 3),
                datetime(2014, 7, 1, 16): datetime(2014, 7, 3),
                datetime(2014, 7, 1, 23): datetime(2014, 7, 3),
                datetime(2014, 7, 1, 0): datetime(2014, 7, 1),
                datetime(2014, 7, 4, 15): datetime(2014, 7, 4),
                datetime(2014, 7, 4, 15, 59): datetime(2014, 7, 4),
                datetime(2014, 7, 4, 16, 30): datetime(2014, 7, 7),
                datetime(2014, 7, 5, 23): datetime(2014, 7, 7),
                datetime(2014, 7, 6, 10): datetime(2014, 7, 7),
            },
        ),
        (
            CustomBusinessHour(-1, normalize=True, holidays=holidays),
            {
                datetime(2014, 7, 1, 8): datetime(2014, 6, 26),
                datetime(2014, 7, 1, 17): datetime(2014, 7, 1),
                datetime(2014, 7, 1, 16): datetime(2014, 7, 1),
                datetime(2014, 7, 1, 10): datetime(2014, 6, 26),
                datetime(2014, 7, 1, 0): datetime(2014, 6, 26),
                datetime(2014, 7, 7, 10): datetime(2014, 7, 4),
                datetime(2014, 7, 7, 10, 1): datetime(2014, 7, 7),
                datetime(2014, 7, 5, 23): datetime(2014, 7, 4),
                datetime(2014, 7, 6, 10): datetime(2014, 7, 4),
            },
        ),
        (
            CustomBusinessHour(1,
                               normalize=True,
                               start="17:00",
                               end="04:00",
                               holidays=holidays),
            {
                datetime(2014, 7, 1, 8): datetime(2014, 7, 1),
                datetime(2014, 7, 1, 17): datetime(2014, 7, 1),
                datetime(2014, 7, 1, 23): datetime(2014, 7, 2),
                datetime(2014, 7, 2, 2): datetime(2014, 7, 2),
                datetime(2014, 7, 2, 3): datetime(2014, 7, 3),
                datetime(2014, 7, 4, 23): datetime(2014, 7, 5),
                datetime(2014, 7, 5, 2): datetime(2014, 7, 5),
                datetime(2014, 7, 7, 2): datetime(2014, 7, 7),
                datetime(2014, 7, 7, 17): datetime(2014, 7, 7),
            },
        ),
    ]

    @pytest.mark.parametrize("norm_cases", normalize_cases)
    def test_normalize(self, norm_cases):
        offset, cases = norm_cases
        for dt, expected in cases.items():
            assert offset.apply(dt) == expected

    def test_is_on_offset(self):
        tests = [(
            CustomBusinessHour(start="10:00",
                               end="15:00",
                               holidays=self.holidays),
            {
                datetime(2014, 7, 1, 9): False,
                datetime(2014, 7, 1, 10): True,
                datetime(2014, 7, 1, 15): True,
                datetime(2014, 7, 1, 15, 1): False,
                datetime(2014, 7, 5, 12): False,
                datetime(2014, 7, 6, 12): False,
            },
        )]

        for offset, cases in tests:
            for dt, expected in cases.items():
                assert offset.is_on_offset(dt) == expected

    apply_cases = [
        (
            CustomBusinessHour(holidays=holidays),
            {
                datetime(2014, 7, 1, 11):
                datetime(2014, 7, 1, 12),
                datetime(2014, 7, 1, 13):
                datetime(2014, 7, 1, 14),
                datetime(2014, 7, 1, 15):
                datetime(2014, 7, 1, 16),
                datetime(2014, 7, 1, 19):
                datetime(2014, 7, 3, 10),
                datetime(2014, 7, 1, 16):
                datetime(2014, 7, 3, 9),
                datetime(2014, 7, 1, 16, 30, 15):
                datetime(2014, 7, 3, 9, 30, 15),
                datetime(2014, 7, 1, 17):
                datetime(2014, 7, 3, 10),
                datetime(2014, 7, 2, 11):
                datetime(2014, 7, 3, 10),
                # out of business hours
                datetime(2014, 7, 2, 8):
                datetime(2014, 7, 3, 10),
                datetime(2014, 7, 2, 19):
                datetime(2014, 7, 3, 10),
                datetime(2014, 7, 2, 23):
                datetime(2014, 7, 3, 10),
                datetime(2014, 7, 3, 0):
                datetime(2014, 7, 3, 10),
                # saturday
                datetime(2014, 7, 5, 15):
                datetime(2014, 7, 7, 10),
                datetime(2014, 7, 4, 17):
                datetime(2014, 7, 7, 10),
                datetime(2014, 7, 4, 16, 30):
                datetime(2014, 7, 7, 9, 30),
                datetime(2014, 7, 4, 16, 30, 30):
                datetime(2014, 7, 7, 9, 30, 30),
            },
        ),
        (
            CustomBusinessHour(4, holidays=holidays),
            {
                datetime(2014, 7, 1, 11): datetime(2014, 7, 1, 15),
                datetime(2014, 7, 1, 13): datetime(2014, 7, 3, 9),
                datetime(2014, 7, 1, 15): datetime(2014, 7, 3, 11),
                datetime(2014, 7, 1, 16): datetime(2014, 7, 3, 12),
                datetime(2014, 7, 1, 17): datetime(2014, 7, 3, 13),
                datetime(2014, 7, 2, 11): datetime(2014, 7, 3, 13),
                datetime(2014, 7, 2, 8): datetime(2014, 7, 3, 13),
                datetime(2014, 7, 2, 19): datetime(2014, 7, 3, 13),
                datetime(2014, 7, 2, 23): datetime(2014, 7, 3, 13),
                datetime(2014, 7, 3, 0): datetime(2014, 7, 3, 13),
                datetime(2014, 7, 5, 15): datetime(2014, 7, 7, 13),
                datetime(2014, 7, 4, 17): datetime(2014, 7, 7, 13),
                datetime(2014, 7, 4, 16, 30): datetime(2014, 7, 7, 12, 30),
                datetime(2014, 7, 4, 16, 30, 30):
                datetime(2014, 7, 7, 12, 30, 30),
            },
        ),
    ]

    @pytest.mark.parametrize("apply_case", apply_cases)
    def test_apply(self, apply_case):
        offset, cases = apply_case
        for base, expected in cases.items():
            assert_offset_equal(offset, base, expected)

    nano_cases = [
        (
            CustomBusinessHour(holidays=holidays),
            {
                Timestamp("2014-07-01 15:00") + Nano(5):
                Timestamp("2014-07-01 16:00") + Nano(5),
                Timestamp("2014-07-01 16:00") + Nano(5):
                Timestamp("2014-07-03 09:00") + Nano(5),
                Timestamp("2014-07-01 16:00") - Nano(5):
                Timestamp("2014-07-01 17:00") - Nano(5),
            },
        ),
        (
            CustomBusinessHour(-1, holidays=holidays),
            {
                Timestamp("2014-07-01 15:00") + Nano(5):
                Timestamp("2014-07-01 14:00") + Nano(5),
                Timestamp("2014-07-01 10:00") + Nano(5):
                Timestamp("2014-07-01 09:00") + Nano(5),
                Timestamp("2014-07-01 10:00") - Nano(5):
                Timestamp("2014-06-26 17:00") - Nano(5),
            },
        ),
    ]

    @pytest.mark.parametrize("nano_case", nano_cases)
    def test_apply_nanoseconds(self, nano_case):
        offset, cases = nano_case
        for base, expected in cases.items():
            assert_offset_equal(offset, base, expected)
Example #9
0
 def test_delta_preserve_nanos(self):
     val = Timestamp(1337299200000000123)
     result = val + timedelta(1)
     assert result.nanosecond == val.nanosecond
Example #10
0
 def test_timestamp_add_timedelta64_unit(self, other, expected_difference):
     ts = Timestamp(datetime.utcnow())
     result = ts + other
     valdiff = result.value - ts.value
     assert valdiff == expected_difference
Example #11
0
class TestTimestampArithmetic:
    def test_overflow_offset(self):
        # no overflow expected

        stamp = Timestamp("2000/1/1")
        offset_no_overflow = to_offset("D") * 100

        expected = Timestamp("2000/04/10")
        assert stamp + offset_no_overflow == expected

        assert offset_no_overflow + stamp == expected

        expected = Timestamp("1999/09/23")
        assert stamp - offset_no_overflow == expected

    def test_overflow_offset_raises(self):
        # xref https://github.com/statsmodels/statsmodels/issues/3374
        # ends up multiplying really large numbers which overflow

        stamp = Timestamp("2017-01-13 00:00:00")
        offset_overflow = 20169940 * offsets.Day(1)
        msg = ("the add operation between "
               r"\<-?\d+ \* Days\> and \d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2} "
               "will overflow")
        lmsg2 = r"Cannot cast <-?20169940 \* Days> to unit=ns without overflow"

        with pytest.raises(OutOfBoundsTimedelta, match=lmsg2):
            stamp + offset_overflow

        with pytest.raises(OverflowError, match=msg):
            offset_overflow + stamp

        with pytest.raises(OutOfBoundsTimedelta, match=lmsg2):
            stamp - offset_overflow

        # xref https://github.com/pandas-dev/pandas/issues/14080
        # used to crash, so check for proper overflow exception

        stamp = Timestamp("2000/1/1")
        offset_overflow = to_offset("D") * 100**5

        lmsg3 = r"Cannot cast <-?10000000000 \* Days> to unit=ns without overflow"
        with pytest.raises(OutOfBoundsTimedelta, match=lmsg3):
            stamp + offset_overflow

        with pytest.raises(OverflowError, match=msg):
            offset_overflow + stamp

        with pytest.raises(OutOfBoundsTimedelta, match=lmsg3):
            stamp - offset_overflow

    def test_overflow_timestamp_raises(self):
        # https://github.com/pandas-dev/pandas/issues/31774
        msg = "Result is too large"
        a = Timestamp("2101-01-01 00:00:00")
        b = Timestamp("1688-01-01 00:00:00")

        with pytest.raises(OutOfBoundsDatetime, match=msg):
            a - b

        # but we're OK for timestamp and datetime.datetime
        assert (a - b.to_pydatetime()) == (a.to_pydatetime() - b)

    def test_delta_preserve_nanos(self):
        val = Timestamp(1337299200000000123)
        result = val + timedelta(1)
        assert result.nanosecond == val.nanosecond

    def test_rsub_dtscalars(self, tz_naive_fixture):
        # In particular, check that datetime64 - Timestamp works GH#28286
        td = Timedelta(1235345642000)
        ts = Timestamp("2021-01-01", tz=tz_naive_fixture)
        other = ts + td

        assert other - ts == td
        assert other.to_pydatetime() - ts == td
        if tz_naive_fixture is None:
            assert other.to_datetime64() - ts == td
        else:
            msg = "Cannot subtract tz-naive and tz-aware datetime-like objects"
            with pytest.raises(TypeError, match=msg):
                other.to_datetime64() - ts

    def test_timestamp_sub_datetime(self):
        dt = datetime(2013, 10, 12)
        ts = Timestamp(datetime(2013, 10, 13))
        assert (ts - dt).days == 1
        assert (dt - ts).days == -1

    def test_subtract_tzaware_datetime(self):
        t1 = Timestamp("2020-10-22T22:00:00+00:00")
        t2 = datetime(2020, 10, 22, 22, tzinfo=timezone.utc)

        result = t1 - t2

        assert isinstance(result, Timedelta)
        assert result == Timedelta("0 days")

    def test_subtract_timestamp_from_different_timezone(self):
        t1 = Timestamp("20130101").tz_localize("US/Eastern")
        t2 = Timestamp("20130101").tz_localize("CET")

        result = t1 - t2

        assert isinstance(result, Timedelta)
        assert result == Timedelta("0 days 06:00:00")

    def test_subtracting_involving_datetime_with_different_tz(self):
        t1 = datetime(2013, 1, 1, tzinfo=timezone(timedelta(hours=-5)))
        t2 = Timestamp("20130101").tz_localize("CET")

        result = t1 - t2

        assert isinstance(result, Timedelta)
        assert result == Timedelta("0 days 06:00:00")

        result = t2 - t1
        assert isinstance(result, Timedelta)
        assert result == Timedelta("-1 days +18:00:00")

    def test_subtracting_different_timezones(self, tz_aware_fixture):
        t_raw = Timestamp("20130101")
        t_UTC = t_raw.tz_localize("UTC")
        t_diff = t_UTC.tz_convert(tz_aware_fixture) + Timedelta(
            "0 days 05:00:00")

        result = t_diff - t_UTC

        assert isinstance(result, Timedelta)
        assert result == Timedelta("0 days 05:00:00")

    def test_addition_subtraction_types(self):
        # Assert on the types resulting from Timestamp +/- various date/time
        # objects
        dt = datetime(2014, 3, 4)
        td = timedelta(seconds=1)
        # build a timestamp with a frequency, since then it supports
        # addition/subtraction of integers
        with tm.assert_produces_warning(FutureWarning,
                                        match="The 'freq' argument"):
            # freq deprecated
            ts = Timestamp(dt, freq="D")

        msg = "Addition/subtraction of integers"
        with pytest.raises(TypeError, match=msg):
            # GH#22535 add/sub with integers is deprecated
            ts + 1
        with pytest.raises(TypeError, match=msg):
            ts - 1

        # Timestamp + datetime not supported, though subtraction is supported
        # and yields timedelta more tests in tseries/base/tests/test_base.py
        assert type(ts - dt) == Timedelta
        assert type(ts + td) == Timestamp
        assert type(ts - td) == Timestamp

        # Timestamp +/- datetime64 not supported, so not tested (could possibly
        # assert error raised?)
        td64 = np.timedelta64(1, "D")
        assert type(ts + td64) == Timestamp
        assert type(ts - td64) == Timestamp

    @pytest.mark.parametrize(
        "freq, td, td64",
        [
            ("S", timedelta(seconds=1), np.timedelta64(1, "s")),
            ("min", timedelta(minutes=1), np.timedelta64(1, "m")),
            ("H", timedelta(hours=1), np.timedelta64(1, "h")),
            ("D", timedelta(days=1), np.timedelta64(1, "D")),
            ("W", timedelta(weeks=1), np.timedelta64(1, "W")),
            ("M", None, np.timedelta64(1, "M")),
        ],
    )
    @pytest.mark.filterwarnings(
        "ignore:Timestamp.freq is deprecated:FutureWarning")
    @pytest.mark.filterwarnings("ignore:The 'freq' argument:FutureWarning")
    def test_addition_subtraction_preserve_frequency(self, freq, td, td64):
        ts = Timestamp("2014-03-05 00:00:00", freq=freq)
        original_freq = ts.freq

        assert (ts + 1 * original_freq).freq == original_freq
        assert (ts - 1 * original_freq).freq == original_freq

        if td is not None:
            # timedelta does not support months as unit
            assert (ts + td).freq == original_freq
            assert (ts - td).freq == original_freq

        assert (ts + td64).freq == original_freq
        assert (ts - td64).freq == original_freq

    @pytest.mark.parametrize(
        "td", [Timedelta(hours=3),
               np.timedelta64(3, "h"),
               timedelta(hours=3)])
    def test_radd_tdscalar(self, td, fixed_now_ts):
        # GH#24775 timedelta64+Timestamp should not raise
        ts = fixed_now_ts
        assert td + ts == ts + td

    @pytest.mark.parametrize(
        "other,expected_difference",
        [
            (np.timedelta64(-123, "ns"), -123),
            (np.timedelta64(1234567898, "ns"), 1234567898),
            (np.timedelta64(-123, "us"), -123000),
            (np.timedelta64(-123, "ms"), -123000000),
        ],
    )
    def test_timestamp_add_timedelta64_unit(self, other, expected_difference):
        ts = Timestamp(datetime.utcnow())
        result = ts + other
        valdiff = result.value - ts.value
        assert valdiff == expected_difference

    @pytest.mark.parametrize(
        "ts",
        [
            Timestamp("1776-07-04"),
            Timestamp("1776-07-04", tz="UTC"),
        ],
    )
    @pytest.mark.parametrize(
        "other",
        [
            1,
            np.int64(1),
            np.array([1, 2], dtype=np.int32),
            np.array([3, 4], dtype=np.uint64),
        ],
    )
    def test_add_int_with_freq(self, ts, other):
        msg = "Addition/subtraction of integers and integer-arrays"
        with pytest.raises(TypeError, match=msg):
            ts + other
        with pytest.raises(TypeError, match=msg):
            other + ts

        with pytest.raises(TypeError, match=msg):
            ts - other

        msg = "unsupported operand type"
        with pytest.raises(TypeError, match=msg):
            other - ts

    @pytest.mark.parametrize("shape", [(6, ), (2, 3)])
    def test_addsub_m8ndarray(self, shape):
        # GH#33296
        ts = Timestamp("2020-04-04 15:45")
        other = np.arange(6).astype("m8[h]").reshape(shape)

        result = ts + other

        ex_stamps = [ts + Timedelta(hours=n) for n in range(6)]
        expected = np.array([x.asm8 for x in ex_stamps],
                            dtype="M8[ns]").reshape(shape)
        tm.assert_numpy_array_equal(result, expected)

        result = other + ts
        tm.assert_numpy_array_equal(result, expected)

        result = ts - other
        ex_stamps = [ts - Timedelta(hours=n) for n in range(6)]
        expected = np.array([x.asm8 for x in ex_stamps],
                            dtype="M8[ns]").reshape(shape)
        tm.assert_numpy_array_equal(result, expected)

        msg = r"unsupported operand type\(s\) for -: 'numpy.ndarray' and 'Timestamp'"
        with pytest.raises(TypeError, match=msg):
            other - ts

    @pytest.mark.parametrize("shape", [(6, ), (2, 3)])
    def test_addsub_m8ndarray_tzaware(self, shape):
        # GH#33296
        ts = Timestamp("2020-04-04 15:45", tz="US/Pacific")

        other = np.arange(6).astype("m8[h]").reshape(shape)

        result = ts + other

        ex_stamps = [ts + Timedelta(hours=n) for n in range(6)]
        expected = np.array(ex_stamps).reshape(shape)
        tm.assert_numpy_array_equal(result, expected)

        result = other + ts
        tm.assert_numpy_array_equal(result, expected)

        result = ts - other
        ex_stamps = [ts - Timedelta(hours=n) for n in range(6)]
        expected = np.array(ex_stamps).reshape(shape)
        tm.assert_numpy_array_equal(result, expected)

        msg = r"unsupported operand type\(s\) for -: 'numpy.ndarray' and 'Timestamp'"
        with pytest.raises(TypeError, match=msg):
            other - ts

    def test_subtract_different_utc_objects(self, utc_fixture, utc_fixture2):
        # GH 32619
        dt = datetime(2021, 1, 1)
        ts1 = Timestamp(dt, tz=utc_fixture)
        ts2 = Timestamp(dt, tz=utc_fixture2)
        result = ts1 - ts2
        expected = Timedelta(0)
        assert result == expected
Example #12
0
 def test_timestamp_sub_datetime(self):
     dt = datetime(2013, 10, 12)
     ts = Timestamp(datetime(2013, 10, 13))
     assert (ts - dt).days == 1
     assert (dt - ts).days == -1
Example #13
0
    def test_constructor_invalid(self):

        # invalid
        pytest.raises(TypeError, Float64Index, 0.)
        pytest.raises(TypeError, Float64Index, ['a', 'b', 0.])
        pytest.raises(TypeError, Float64Index, [Timestamp('20130101')])
Example #14
0
class Base:
    _offset: Optional[Type[DateOffset]] = None
    d = Timestamp(datetime(2008, 1, 2))

    timezones = [
        None,
        "UTC",
        "Asia/Tokyo",
        "US/Eastern",
        "dateutil/Asia/Tokyo",
        "dateutil/US/Pacific",
    ]

    def _get_offset(self, klass, value=1, normalize=False):
        # create instance from offset class
        if klass is FY5253:
            klass = klass(
                n=value,
                startingMonth=1,
                weekday=1,
                variation="last",
                normalize=normalize,
            )
        elif klass is FY5253Quarter:
            klass = klass(
                n=value,
                startingMonth=1,
                weekday=1,
                qtr_with_extra_week=1,
                variation="last",
                normalize=normalize,
            )
        elif klass is LastWeekOfMonth:
            klass = klass(n=value, weekday=5, normalize=normalize)
        elif klass is WeekOfMonth:
            klass = klass(n=value, week=1, weekday=5, normalize=normalize)
        elif klass is Week:
            klass = klass(n=value, weekday=5, normalize=normalize)
        elif klass is DateOffset:
            klass = klass(days=value, normalize=normalize)
        else:
            klass = klass(value, normalize=normalize)
        return klass

    def test_apply_out_of_range(self, request, tz_naive_fixture):
        tz = tz_naive_fixture
        if self._offset is None:
            return

        # try to create an out-of-bounds result timestamp; if we can't create
        # the offset skip
        try:
            if self._offset in (BusinessHour, CustomBusinessHour):
                # Using 10000 in BusinessHour fails in tz check because of DST
                # difference
                offset = self._get_offset(self._offset, value=100000)
            else:
                offset = self._get_offset(self._offset, value=10000)

            result = Timestamp("20080101") + offset
            assert isinstance(result, datetime)
            assert result.tzinfo is None

            # Check tz is preserved
            t = Timestamp("20080101", tz=tz)
            result = t + offset
            assert isinstance(result, datetime)

            if isinstance(tz, tzlocal) and not IS64:
                # If we hit OutOfBoundsDatetime on non-64 bit machines
                # we'll drop out of the try clause before the next test
                request.node.add_marker(
                    pytest.mark.xfail(
                        reason="OverflowError inside tzlocal past 2038"))
            assert t.tzinfo == result.tzinfo

        except OutOfBoundsDatetime:
            pass
        except (ValueError, KeyError):
            # we are creating an invalid offset
            # so ignore
            pass

    def test_offsets_compare_equal(self):
        # root cause of GH#456: __ne__ was not implemented
        if self._offset is None:
            return
        offset1 = self._offset()
        offset2 = self._offset()
        assert not offset1 != offset2
        assert offset1 == offset2

    def test_rsub(self):
        if self._offset is None or not hasattr(self, "offset2"):
            # i.e. skip for TestCommon and YQM subclasses that do not have
            # offset2 attr
            return
        assert self.d - self.offset2 == (-self.offset2).apply(self.d)

    def test_radd(self):
        if self._offset is None or not hasattr(self, "offset2"):
            # i.e. skip for TestCommon and YQM subclasses that do not have
            # offset2 attr
            return
        assert self.d + self.offset2 == self.offset2 + self.d

    def test_sub(self):
        if self._offset is None or not hasattr(self, "offset2"):
            # i.e. skip for TestCommon and YQM subclasses that do not have
            # offset2 attr
            return
        off = self.offset2
        msg = "Cannot subtract datetime from offset"
        with pytest.raises(TypeError, match=msg):
            off - self.d

        assert 2 * off - off == off
        assert self.d - self.offset2 == self.d + self._offset(-2)
        assert self.d - self.offset2 == self.d - (2 * off - off)

    def testMult1(self):
        if self._offset is None or not hasattr(self, "offset1"):
            # i.e. skip for TestCommon and YQM subclasses that do not have
            # offset1 attr
            return
        assert self.d + 10 * self.offset1 == self.d + self._offset(10)
        assert self.d + 5 * self.offset1 == self.d + self._offset(5)

    def testMult2(self):
        if self._offset is None:
            return
        assert self.d + (-5 * self._offset(-10)) == self.d + self._offset(50)
        assert self.d + (-3 * self._offset(-2)) == self.d + self._offset(6)

    def test_compare_str(self):
        # GH#23524
        # comparing to strings that cannot be cast to DateOffsets should
        #  not raise for __eq__ or __ne__
        if self._offset is None:
            return
        off = self._get_offset(self._offset)

        assert not off == "infer"
        assert off != "foo"
Example #15
0
def test_select_dtypes(setup_path):

    with ensure_clean_store(setup_path) as store:
        # with a Timestamp data column (GH #2637)
        df = DataFrame(
            {
                "ts": bdate_range("2012-01-01", periods=300),
                "A": np.random.randn(300),
            }
        )
        _maybe_remove(store, "df")
        store.append("df", df, data_columns=["ts", "A"])

        result = store.select("df", "ts>=Timestamp('2012-02-01')")
        expected = df[df.ts >= Timestamp("2012-02-01")]
        tm.assert_frame_equal(expected, result)

        # bool columns (GH #2849)
        df = DataFrame(np.random.randn(5, 2), columns=["A", "B"])
        df["object"] = "foo"
        df.loc[4:5, "object"] = "bar"
        df["boolv"] = df["A"] > 0
        _maybe_remove(store, "df")
        store.append("df", df, data_columns=True)

        expected = df[df.boolv == True].reindex(columns=["A", "boolv"])  # noqa:E712
        for v in [True, "true", 1]:
            result = store.select("df", f"boolv == {v}", columns=["A", "boolv"])
            tm.assert_frame_equal(expected, result)

        expected = df[df.boolv == False].reindex(columns=["A", "boolv"])  # noqa:E712
        for v in [False, "false", 0]:
            result = store.select("df", f"boolv == {v}", columns=["A", "boolv"])
            tm.assert_frame_equal(expected, result)

        # integer index
        df = DataFrame({"A": np.random.rand(20), "B": np.random.rand(20)})
        _maybe_remove(store, "df_int")
        store.append("df_int", df)
        result = store.select("df_int", "index<10 and columns=['A']")
        expected = df.reindex(index=list(df.index)[0:10], columns=["A"])
        tm.assert_frame_equal(expected, result)

        # float index
        df = DataFrame(
            {
                "A": np.random.rand(20),
                "B": np.random.rand(20),
                "index": np.arange(20, dtype="f8"),
            }
        )
        _maybe_remove(store, "df_float")
        store.append("df_float", df)
        result = store.select("df_float", "index<10.0 and columns=['A']")
        expected = df.reindex(index=list(df.index)[0:10], columns=["A"])
        tm.assert_frame_equal(expected, result)

    with ensure_clean_store(setup_path) as store:

        # floats w/o NaN
        df = DataFrame({"cols": range(11), "values": range(11)}, dtype="float64")
        df["cols"] = (df["cols"] + 10).apply(str)

        store.append("df1", df, data_columns=True)
        result = store.select("df1", where="values>2.0")
        expected = df[df["values"] > 2.0]
        tm.assert_frame_equal(expected, result)

        # floats with NaN
        df.iloc[0] = np.nan
        expected = df[df["values"] > 2.0]

        store.append("df2", df, data_columns=True, index=False)
        result = store.select("df2", where="values>2.0")
        tm.assert_frame_equal(expected, result)

        # https://github.com/PyTables/PyTables/issues/282
        # bug in selection when 0th row has a np.nan and an index
        # store.append('df3',df,data_columns=True)
        # result = store.select(
        #    'df3', where='values>2.0')
        # tm.assert_frame_equal(expected, result)

        # not in first position float with NaN ok too
        df = DataFrame({"cols": range(11), "values": range(11)}, dtype="float64")
        df["cols"] = (df["cols"] + 10).apply(str)

        df.iloc[1] = np.nan
        expected = df[df["values"] > 2.0]

        store.append("df4", df, data_columns=True)
        result = store.select("df4", where="values>2.0")
        tm.assert_frame_equal(expected, result)

    # test selection with comparison against numpy scalar
    # GH 11283
    with ensure_clean_store(setup_path) as store:
        df = tm.makeDataFrame()

        expected = df[df["A"] > 0]

        store.append("df", df, data_columns=True)
        np_zero = np.float64(0)  # noqa:F841
        result = store.select("df", where=["A>np_zero"])
        tm.assert_frame_equal(expected, result)
Example #16
0
class TestWeek(Base):
    _offset = Week
    d = Timestamp(datetime(2008, 1, 2))
    offset1 = _offset()
    offset2 = _offset(2)

    def test_repr(self):
        assert repr(Week(weekday=0)) == "<Week: weekday=0>"
        assert repr(Week(n=-1, weekday=0)) == "<-1 * Week: weekday=0>"
        assert repr(Week(n=-2, weekday=0)) == "<-2 * Weeks: weekday=0>"

    def test_corner(self):
        with pytest.raises(ValueError, match="Day must be"):
            Week(weekday=7)

        with pytest.raises(ValueError, match="Day must be"):
            Week(weekday=-1)

    def test_is_anchored(self):
        assert Week(weekday=0).is_anchored()
        assert not Week().is_anchored()
        assert not Week(2, weekday=2).is_anchored()
        assert not Week(2).is_anchored()

    offset_cases = []
    # not business week
    offset_cases.append((
        Week(),
        {
            datetime(2008, 1, 1): datetime(2008, 1, 8),
            datetime(2008, 1, 4): datetime(2008, 1, 11),
            datetime(2008, 1, 5): datetime(2008, 1, 12),
            datetime(2008, 1, 6): datetime(2008, 1, 13),
            datetime(2008, 1, 7): datetime(2008, 1, 14),
        },
    ))

    # Mon
    offset_cases.append((
        Week(weekday=0),
        {
            datetime(2007, 12, 31): datetime(2008, 1, 7),
            datetime(2008, 1, 4): datetime(2008, 1, 7),
            datetime(2008, 1, 5): datetime(2008, 1, 7),
            datetime(2008, 1, 6): datetime(2008, 1, 7),
            datetime(2008, 1, 7): datetime(2008, 1, 14),
        },
    ))

    # n=0 -> roll forward. Mon
    offset_cases.append((
        Week(0, weekday=0),
        {
            datetime(2007, 12, 31): datetime(2007, 12, 31),
            datetime(2008, 1, 4): datetime(2008, 1, 7),
            datetime(2008, 1, 5): datetime(2008, 1, 7),
            datetime(2008, 1, 6): datetime(2008, 1, 7),
            datetime(2008, 1, 7): datetime(2008, 1, 7),
        },
    ))

    # n=0 -> roll forward. Mon
    offset_cases.append((
        Week(-2, weekday=1),
        {
            datetime(2010, 4, 6): datetime(2010, 3, 23),
            datetime(2010, 4, 8): datetime(2010, 3, 30),
            datetime(2010, 4, 5): datetime(2010, 3, 23),
        },
    ))

    @pytest.mark.parametrize("case", offset_cases)
    def test_offset(self, case):
        offset, cases = case
        for base, expected in cases.items():
            assert_offset_equal(offset, base, expected)

    @pytest.mark.parametrize("weekday", range(7))
    def test_is_on_offset(self, weekday):
        offset = Week(weekday=weekday)

        for day in range(1, 8):
            date = datetime(2008, 1, day)

            if day % 7 == weekday:
                expected = True
            else:
                expected = False
        assert_is_on_offset(offset, date, expected)
Example #17
0
def _convert_listlike_datetimes(
    arg,
    box,
    format,
    name=None,
    tz=None,
    unit=None,
    errors=None,
    infer_datetime_format=None,
    dayfirst=None,
    yearfirst=None,
    exact=None,
):
    """
    Helper function for to_datetime. Performs the conversions of 1D listlike
    of dates

    Parameters
    ----------
    arg : list, tuple, ndarray, Series, Index
        date to be parced
    box : boolean
        True boxes result as an Index-like, False returns an ndarray
    name : object
        None or string for the Index name
    tz : object
        None or 'utc'
    unit : string
        None or string of the frequency of the passed data
    errors : string
        error handing behaviors from to_datetime, 'raise', 'coerce', 'ignore'
    infer_datetime_format : boolean
        inferring format behavior from to_datetime
    dayfirst : boolean
        dayfirst parsing behavior from to_datetime
    yearfirst : boolean
        yearfirst parsing behavior from to_datetime
    exact : boolean
        exact format matching behavior from to_datetime

    Returns
    -------
    ndarray of parsed dates
        Returns:

        - Index-like if box=True
        - ndarray of Timestamps if box=False
    """
    from pandas import DatetimeIndex
    from pandas.core.arrays import DatetimeArray
    from pandas.core.arrays.datetimes import (
        maybe_convert_dtype,
        objects_to_datetime64ns,
    )

    if isinstance(arg, (list, tuple)):
        arg = np.array(arg, dtype="O")

    # these are shortcutable
    if is_datetime64tz_dtype(arg):
        if not isinstance(arg, (DatetimeArray, DatetimeIndex)):
            return DatetimeIndex(arg, tz=tz, name=name)
        if tz == "utc":
            arg = arg.tz_convert(None).tz_localize(tz)
        return arg

    elif is_datetime64_ns_dtype(arg):
        if box and not isinstance(arg, (DatetimeArray, DatetimeIndex)):
            try:
                return DatetimeIndex(arg, tz=tz, name=name)
            except ValueError:
                pass
        elif tz:
            # DatetimeArray, DatetimeIndex
            return arg.tz_localize(tz)

        return arg

    elif unit is not None:
        if format is not None:
            raise ValueError("cannot specify both format and unit")
        arg = getattr(arg, "values", arg)
        result, tz_parsed = tslib.array_with_unit_to_datetime(arg, unit, errors=errors)
        if box:
            if errors == "ignore":
                from pandas import Index

                result = Index(result, name=name)
            else:
                result = DatetimeIndex(result, name=name)
            # GH 23758: We may still need to localize the result with tz
            # GH 25546: Apply tz_parsed first (from arg), then tz (from caller)
            # result will be naive but in UTC
            try:
                result = result.tz_localize("UTC").tz_convert(tz_parsed)
            except AttributeError:
                # Regular Index from 'ignore' path
                return result
            if tz is not None:
                if result.tz is None:
                    result = result.tz_localize(tz)
                else:
                    result = result.tz_convert(tz)
        return result
    elif getattr(arg, "ndim", 1) > 1:
        raise TypeError(
            "arg must be a string, datetime, list, tuple, 1-d array, or Series"
        )

    # warn if passing timedelta64, raise for PeriodDtype
    # NB: this must come after unit transformation
    orig_arg = arg
    arg, _ = maybe_convert_dtype(arg, copy=False)

    arg = ensure_object(arg)
    require_iso8601 = False

    if infer_datetime_format and format is None:
        format = _guess_datetime_format_for_array(arg, dayfirst=dayfirst)

    if format is not None:
        # There is a special fast-path for iso8601 formatted
        # datetime strings, so in those cases don't use the inferred
        # format because this path makes process slower in this
        # special case
        format_is_iso8601 = _format_is_iso(format)
        if format_is_iso8601:
            require_iso8601 = not infer_datetime_format
            format = None

    tz_parsed = None
    result = None

    if format is not None:
        try:
            # shortcut formatting here
            if format == "%Y%m%d":
                try:
                    # pass orig_arg as float-dtype may have been converted to
                    # datetime64[ns]
                    orig_arg = ensure_object(orig_arg)
                    result = _attempt_YYYYMMDD(orig_arg, errors=errors)
                except (ValueError, TypeError, tslibs.OutOfBoundsDatetime):
                    raise ValueError("cannot convert the input to '%Y%m%d' date format")

            # fallback
            if result is None:
                try:
                    result, timezones = array_strptime(
                        arg, format, exact=exact, errors=errors
                    )
                    if "%Z" in format or "%z" in format:
                        return _return_parsed_timezone_results(
                            result, timezones, box, tz, name
                        )
                except tslibs.OutOfBoundsDatetime:
                    if errors == "raise":
                        raise
                    elif errors == "coerce":
                        result = np.empty(arg.shape, dtype="M8[ns]")
                        iresult = result.view("i8")
                        iresult.fill(tslibs.iNaT)
                    else:
                        result = arg
                except ValueError:
                    # if format was inferred, try falling back
                    # to array_to_datetime - terminate here
                    # for specified formats
                    if not infer_datetime_format:
                        if errors == "raise":
                            raise
                        elif errors == "coerce":
                            result = np.empty(arg.shape, dtype="M8[ns]")
                            iresult = result.view("i8")
                            iresult.fill(tslibs.iNaT)
                        else:
                            result = arg
        except ValueError as e:
            # Fallback to try to convert datetime objects if timezone-aware
            #  datetime objects are found without passing `utc=True`
            try:
                values, tz = conversion.datetime_to_datetime64(arg)
                return DatetimeIndex._simple_new(values, name=name, tz=tz)
            except (ValueError, TypeError):
                raise e

    if result is None:
        assert format is None or infer_datetime_format
        utc = tz == "utc"
        result, tz_parsed = objects_to_datetime64ns(
            arg,
            dayfirst=dayfirst,
            yearfirst=yearfirst,
            utc=utc,
            errors=errors,
            require_iso8601=require_iso8601,
            allow_object=True,
        )

    if tz_parsed is not None:
        if box:
            # We can take a shortcut since the datetime64 numpy array
            # is in UTC
            return DatetimeIndex._simple_new(result, name=name, tz=tz_parsed)
        else:
            # Convert the datetime64 numpy array to an numpy array
            # of datetime objects
            result = [Timestamp(ts, tz=tz_parsed).to_pydatetime() for ts in result]
            return np.array(result, dtype=object)

    if box:
        utc = tz == "utc"
        return _box_as_indexlike(result, utc=utc, name=name)
    return result
Example #18
0
def test_append_all_nans(setup_path):

    with ensure_clean_store(setup_path) as store:

        df = DataFrame(
            {"A1": np.random.randn(20), "A2": np.random.randn(20)},
            index=np.arange(20),
        )
        df.loc[0:15, :] = np.nan

        # nan some entire rows (dropna=True)
        _maybe_remove(store, "df")
        store.append("df", df[:10], dropna=True)
        store.append("df", df[10:], dropna=True)
        tm.assert_frame_equal(store["df"], df[-4:])

        # nan some entire rows (dropna=False)
        _maybe_remove(store, "df2")
        store.append("df2", df[:10], dropna=False)
        store.append("df2", df[10:], dropna=False)
        tm.assert_frame_equal(store["df2"], df)

        # tests the option io.hdf.dropna_table
        pd.set_option("io.hdf.dropna_table", False)
        _maybe_remove(store, "df3")
        store.append("df3", df[:10])
        store.append("df3", df[10:])
        tm.assert_frame_equal(store["df3"], df)

        pd.set_option("io.hdf.dropna_table", True)
        _maybe_remove(store, "df4")
        store.append("df4", df[:10])
        store.append("df4", df[10:])
        tm.assert_frame_equal(store["df4"], df[-4:])

        # nan some entire rows (string are still written!)
        df = DataFrame(
            {
                "A1": np.random.randn(20),
                "A2": np.random.randn(20),
                "B": "foo",
                "C": "bar",
            },
            index=np.arange(20),
        )

        df.loc[0:15, :] = np.nan

        _maybe_remove(store, "df")
        store.append("df", df[:10], dropna=True)
        store.append("df", df[10:], dropna=True)
        tm.assert_frame_equal(store["df"], df)

        _maybe_remove(store, "df2")
        store.append("df2", df[:10], dropna=False)
        store.append("df2", df[10:], dropna=False)
        tm.assert_frame_equal(store["df2"], df)

        # nan some entire rows (but since we have dates they are still
        # written!)
        df = DataFrame(
            {
                "A1": np.random.randn(20),
                "A2": np.random.randn(20),
                "B": "foo",
                "C": "bar",
                "D": Timestamp("20010101"),
                "E": datetime.datetime(2001, 1, 2, 0, 0),
            },
            index=np.arange(20),
        )

        df.loc[0:15, :] = np.nan

        _maybe_remove(store, "df")
        store.append("df", df[:10], dropna=True)
        store.append("df", df[10:], dropna=True)
        tm.assert_frame_equal(store["df"], df)

        _maybe_remove(store, "df2")
        store.append("df2", df[:10], dropna=False)
        store.append("df2", df[10:], dropna=False)
        tm.assert_frame_equal(store["df2"], df)
    def test_with_offset(self):
        expected = Timestamp("2014-07-01 13:00")

        assert self.d + CustomBusinessHour() * 3 == expected
        assert self.d + CustomBusinessHour(n=3) == expected
Example #20
0
def test_append_with_data_columns(setup_path):

    with ensure_clean_store(setup_path) as store:
        df = tm.makeTimeDataFrame()
        df.iloc[0, df.columns.get_loc("B")] = 1.0
        _maybe_remove(store, "df")
        store.append("df", df[:2], data_columns=["B"])
        store.append("df", df[2:])
        tm.assert_frame_equal(store["df"], df)

        # check that we have indices created
        assert store._handle.root.df.table.cols.index.is_indexed is True
        assert store._handle.root.df.table.cols.B.is_indexed is True

        # data column searching
        result = store.select("df", "B>0")
        expected = df[df.B > 0]
        tm.assert_frame_equal(result, expected)

        # data column searching (with an indexable and a data_columns)
        result = store.select("df", "B>0 and index>df.index[3]")
        df_new = df.reindex(index=df.index[4:])
        expected = df_new[df_new.B > 0]
        tm.assert_frame_equal(result, expected)

        # data column selection with a string data_column
        df_new = df.copy()
        df_new["string"] = "foo"
        df_new.loc[df_new.index[1:4], "string"] = np.nan
        df_new.loc[df_new.index[5:6], "string"] = "bar"
        _maybe_remove(store, "df")
        store.append("df", df_new, data_columns=["string"])
        result = store.select("df", "string='foo'")
        expected = df_new[df_new.string == "foo"]
        tm.assert_frame_equal(result, expected)

        # using min_itemsize and a data column
        def check_col(key, name, size):
            assert (
                getattr(store.get_storer(key).table.description, name).itemsize == size
            )

    with ensure_clean_store(setup_path) as store:
        _maybe_remove(store, "df")
        store.append("df", df_new, data_columns=["string"], min_itemsize={"string": 30})
        check_col("df", "string", 30)
        _maybe_remove(store, "df")
        store.append("df", df_new, data_columns=["string"], min_itemsize=30)
        check_col("df", "string", 30)
        _maybe_remove(store, "df")
        store.append("df", df_new, data_columns=["string"], min_itemsize={"values": 30})
        check_col("df", "string", 30)

    with ensure_clean_store(setup_path) as store:
        df_new["string2"] = "foobarbah"
        df_new["string_block1"] = "foobarbah1"
        df_new["string_block2"] = "foobarbah2"
        _maybe_remove(store, "df")
        store.append(
            "df",
            df_new,
            data_columns=["string", "string2"],
            min_itemsize={"string": 30, "string2": 40, "values": 50},
        )
        check_col("df", "string", 30)
        check_col("df", "string2", 40)
        check_col("df", "values_block_1", 50)

    with ensure_clean_store(setup_path) as store:
        # multiple data columns
        df_new = df.copy()
        df_new.iloc[0, df_new.columns.get_loc("A")] = 1.0
        df_new.iloc[0, df_new.columns.get_loc("B")] = -1.0
        df_new["string"] = "foo"

        sl = df_new.columns.get_loc("string")
        df_new.iloc[1:4, sl] = np.nan
        df_new.iloc[5:6, sl] = "bar"

        df_new["string2"] = "foo"
        sl = df_new.columns.get_loc("string2")
        df_new.iloc[2:5, sl] = np.nan
        df_new.iloc[7:8, sl] = "bar"
        _maybe_remove(store, "df")
        store.append("df", df_new, data_columns=["A", "B", "string", "string2"])
        result = store.select("df", "string='foo' and string2='foo' and A>0 and B<0")
        expected = df_new[
            (df_new.string == "foo")
            & (df_new.string2 == "foo")
            & (df_new.A > 0)
            & (df_new.B < 0)
        ]
        tm.assert_frame_equal(result, expected, check_freq=False)
        # FIXME: 2020-05-07 freq check randomly fails in the CI

        # yield an empty frame
        result = store.select("df", "string='foo' and string2='cool'")
        expected = df_new[(df_new.string == "foo") & (df_new.string2 == "cool")]
        tm.assert_frame_equal(result, expected)

    with ensure_clean_store(setup_path) as store:
        # doc example
        df_dc = df.copy()
        df_dc["string"] = "foo"
        df_dc.loc[df_dc.index[4:6], "string"] = np.nan
        df_dc.loc[df_dc.index[7:9], "string"] = "bar"
        df_dc["string2"] = "cool"
        df_dc["datetime"] = Timestamp("20010102")
        df_dc = df_dc._convert(datetime=True)
        df_dc.loc[df_dc.index[3:5], ["A", "B", "datetime"]] = np.nan

        _maybe_remove(store, "df_dc")
        store.append(
            "df_dc", df_dc, data_columns=["B", "C", "string", "string2", "datetime"]
        )
        result = store.select("df_dc", "B>0")

        expected = df_dc[df_dc.B > 0]
        tm.assert_frame_equal(result, expected)

        result = store.select("df_dc", ["B > 0", "C > 0", "string == foo"])
        expected = df_dc[(df_dc.B > 0) & (df_dc.C > 0) & (df_dc.string == "foo")]
        tm.assert_frame_equal(result, expected, check_freq=False)
        # FIXME: 2020-12-07 intermittent build failures here with freq of
        #  None instead of BDay(4)

    with ensure_clean_store(setup_path) as store:
        # doc example part 2
        np.random.seed(1234)
        index = date_range("1/1/2000", periods=8)
        df_dc = DataFrame(np.random.randn(8, 3), index=index, columns=["A", "B", "C"])
        df_dc["string"] = "foo"
        df_dc.loc[df_dc.index[4:6], "string"] = np.nan
        df_dc.loc[df_dc.index[7:9], "string"] = "bar"
        df_dc.loc[:, ["B", "C"]] = df_dc.loc[:, ["B", "C"]].abs()
        df_dc["string2"] = "cool"

        # on-disk operations
        store.append("df_dc", df_dc, data_columns=["B", "C", "string", "string2"])

        result = store.select("df_dc", "B>0")
        expected = df_dc[df_dc.B > 0]
        tm.assert_frame_equal(result, expected)

        result = store.select("df_dc", ["B > 0", "C > 0", 'string == "foo"'])
        expected = df_dc[(df_dc.B > 0) & (df_dc.C > 0) & (df_dc.string == "foo")]
        tm.assert_frame_equal(result, expected)
Example #21
0
def _adjust_to_origin(arg, origin, unit):
    """
    Helper function for to_datetime.
    Adjust input argument to the specified origin

    Parameters
    ----------
    arg : list, tuple, ndarray, Series, Index
        date to be adjusted
    origin : 'julian' or Timestamp
        origin offset for the arg
    unit : str
        passed unit from to_datetime, must be 'D'

    Returns
    -------
    ndarray or scalar of adjusted date(s)
    """
    if origin == "julian":
        original = arg
        j0 = Timestamp(0).to_julian_date()
        if unit != "D":
            raise ValueError("unit must be 'D' for origin='julian'")
        try:
            arg = arg - j0
        except TypeError as err:
            raise ValueError(
                "incompatible 'arg' type for given 'origin'='julian'"
            ) from err

        # preemptively check this for a nice range
        j_max = Timestamp.max.to_julian_date() - j0
        j_min = Timestamp.min.to_julian_date() - j0
        if np.any(arg > j_max) or np.any(arg < j_min):
            raise OutOfBoundsDatetime(
                f"{original} is Out of Bounds for origin='julian'"
            )
    else:
        # arg must be numeric
        if not (
            (is_scalar(arg) and (is_integer(arg) or is_float(arg)))
            or is_numeric_dtype(np.asarray(arg))
        ):
            raise ValueError(
                f"'{arg}' is not compatible with origin='{origin}'; "
                "it must be numeric with a unit specified"
            )

        # we are going to offset back to unix / epoch time
        try:
            offset = Timestamp(origin)
        except OutOfBoundsDatetime as err:
            raise OutOfBoundsDatetime(f"origin {origin} is Out of Bounds") from err
        except ValueError as err:
            raise ValueError(
                f"origin {origin} cannot be converted to a Timestamp"
            ) from err

        if offset.tz is not None:
            raise ValueError(f"origin offset {offset} must be tz-naive")
        td_offset = offset - Timestamp(0)

        # convert the offset to the unit of the arg
        # this should be lossless in terms of precision
        ioffset = td_offset // Timedelta(1, unit=unit)

        # scalars & ndarray-like can handle the addition
        if is_list_like(arg) and not isinstance(arg, (ABCSeries, Index, np.ndarray)):
            arg = np.asarray(arg)
        arg = arg + ioffset
    return arg
Example #22
0
    def test_apply_nanoseconds(self):
        tests = [
            (
                BusinessHour(),
                {
                    Timestamp("2014-07-04 15:00") + Nano(5):
                    Timestamp("2014-07-04 16:00") + Nano(5),
                    Timestamp("2014-07-04 16:00") + Nano(5):
                    Timestamp("2014-07-07 09:00") + Nano(5),
                    Timestamp("2014-07-04 16:00") - Nano(5):
                    Timestamp("2014-07-04 17:00") - Nano(5),
                },
            ),
            (
                BusinessHour(-1),
                {
                    Timestamp("2014-07-04 15:00") + Nano(5):
                    Timestamp("2014-07-04 14:00") + Nano(5),
                    Timestamp("2014-07-04 10:00") + Nano(5):
                    Timestamp("2014-07-04 09:00") + Nano(5),
                    Timestamp("2014-07-04 10:00") - Nano(5):
                    Timestamp("2014-07-03 17:00") - Nano(5),
                },
            ),
        ]

        for offset, cases in tests:
            for base, expected in cases.items():
                assert_offset_equal(offset, base, expected)
Example #23
0
    def convert_value(self, v) -> "TermValue":
        """
        convert the expression that is in the term to something that is
        accepted by pytables
        """
        def stringify(value):
            if self.encoding is not None:
                encoder = partial(pprint_thing_encoded, encoding=self.encoding)
            else:
                encoder = pprint_thing
            return encoder(value)

        kind = _ensure_decoded(self.kind)
        meta = _ensure_decoded(self.meta)
        if kind == "datetime64" or kind == "datetime":
            if isinstance(v, (int, float)):
                v = stringify(v)
            v = _ensure_decoded(v)
            v = Timestamp(v)
            if v.tz is not None:
                v = v.tz_convert("UTC")
            return TermValue(v, v.value, kind)
        elif kind == "timedelta64" or kind == "timedelta":
            v = Timedelta(v, unit="s").value
            return TermValue(int(v), v, kind)
        elif meta == "category":
            metadata = com.values_from_object(self.metadata)
            result = metadata.searchsorted(v, side="left")

            # result returns 0 if v is first element or if v is not in metadata
            # check that metadata contains v
            if not result and v not in metadata:
                result = -1
            return TermValue(result, result, "integer")
        elif kind == "integer":
            v = int(float(v))
            return TermValue(v, v, kind)
        elif kind == "float":
            v = float(v)
            return TermValue(v, v, kind)
        elif kind == "bool":
            if isinstance(v, str):
                v = not v.strip().lower() in [
                    "false",
                    "f",
                    "no",
                    "n",
                    "none",
                    "0",
                    "[]",
                    "{}",
                    "",
                ]
            else:
                v = bool(v)
            return TermValue(v, v, kind)
        elif isinstance(v, str):
            # string quoting
            return TermValue(v, stringify(v), "string")
        else:
            raise TypeError(
                f"Cannot compare {v} of type {type(v)} to {kind} column")
Example #24
0
    def convert_value(self, v) -> TermValue:
        """
        convert the expression that is in the term to something that is
        accepted by pytables
        """

        def stringify(value):
            if self.encoding is not None:
                return pprint_thing_encoded(value, encoding=self.encoding)
            return pprint_thing(value)

        kind = ensure_decoded(self.kind)
        meta = ensure_decoded(self.meta)
        if kind == "datetime64" or kind == "datetime":
            if isinstance(v, (int, float)):
                v = stringify(v)
            v = ensure_decoded(v)
            v = Timestamp(v)
            if v.tz is not None:
                v = v.tz_convert("UTC")
            return TermValue(v, v.value, kind)
        elif kind == "timedelta64" or kind == "timedelta":
            if isinstance(v, str):
                v = Timedelta(v).value
            else:
                v = Timedelta(v, unit="s").value
            return TermValue(int(v), v, kind)
        elif meta == "category":
            metadata = extract_array(self.metadata, extract_numpy=True)
            result: npt.NDArray[np.intp] | np.intp | int
            if v not in metadata:
                result = -1
            else:
                result = metadata.searchsorted(v, side="left")
            return TermValue(result, result, "integer")
        elif kind == "integer":
            v = int(float(v))
            return TermValue(v, v, kind)
        elif kind == "float":
            v = float(v)
            return TermValue(v, v, kind)
        elif kind == "bool":
            if isinstance(v, str):
                v = not v.strip().lower() in [
                    "false",
                    "f",
                    "no",
                    "n",
                    "none",
                    "0",
                    "[]",
                    "{}",
                    "",
                ]
            else:
                v = bool(v)
            return TermValue(v, v, kind)
        elif isinstance(v, str):
            # string quoting
            return TermValue(v, stringify(v), "string")
        else:
            raise TypeError(f"Cannot compare {v} of type {type(v)} to {kind} column")
Example #25
0
 def setup_method(self, method):
     self.d = Timestamp(datetime(2008, 1, 2))
     _offset_map.clear()
Example #26
0
    def test_arithmetic_overflow(self):
        with pytest.raises(OverflowError):
            Timestamp("1700-01-01") + Timedelta(13 * 19999, unit="D")

        with pytest.raises(OverflowError):
            Timestamp("1700-01-01") + timedelta(days=13 * 19999)
Example #27
0
    def _check_offsetfunc_works(self,
                                offset,
                                funcname,
                                dt,
                                expected,
                                normalize=False):

        if normalize and issubclass(offset, Tick):
            # normalize=True disallowed for Tick subclasses GH#21427
            return

        offset_s = self._get_offset(offset, normalize=normalize)
        func = getattr(offset_s, funcname)

        result = func(dt)
        assert isinstance(result, Timestamp)
        assert result == expected

        result = func(Timestamp(dt))
        assert isinstance(result, Timestamp)
        assert result == expected

        # see gh-14101
        exp_warning = None
        ts = Timestamp(dt) + Nano(5)

        if (type(offset_s).__name__ == "DateOffset"
                and (funcname in ["apply", "_apply"] or normalize)
                and ts.nanosecond > 0):
            exp_warning = UserWarning

        # test nanosecond is preserved
        with tm.assert_produces_warning(exp_warning):
            result = func(ts)

        if exp_warning is None and funcname == "_apply":
            # GH#44522
            # Check in this particular case to avoid headaches with
            #  testing for multiple warnings produced by the same call.
            with tm.assert_produces_warning(FutureWarning,
                                            match="apply is deprecated"):
                res2 = offset_s.apply(ts)

            assert type(res2) is type(result)
            assert res2 == result

        assert isinstance(result, Timestamp)
        if normalize is False:
            assert result == expected + Nano(5)
        else:
            assert result == expected

        if isinstance(dt, np.datetime64):
            # test tz when input is datetime or Timestamp
            return

        for tz in self.timezones:
            expected_localize = expected.tz_localize(tz)
            tz_obj = timezones.maybe_get_tz(tz)
            dt_tz = conversion.localize_pydatetime(dt, tz_obj)

            result = func(dt_tz)
            assert isinstance(result, Timestamp)
            assert result == expected_localize

            result = func(Timestamp(dt, tz=tz))
            assert isinstance(result, Timestamp)
            assert result == expected_localize

            # see gh-14101
            exp_warning = None
            ts = Timestamp(dt, tz=tz) + Nano(5)

            if (type(offset_s).__name__ == "DateOffset"
                    and (funcname in ["apply", "_apply"] or normalize)
                    and ts.nanosecond > 0):
                exp_warning = UserWarning

            # test nanosecond is preserved
            with tm.assert_produces_warning(exp_warning):
                result = func(ts)
            assert isinstance(result, Timestamp)
            if normalize is False:
                assert result == expected_localize + Nano(5)
            else:
                assert result == expected_localize
Example #28
0
 def test_add(self, arithmatic_offset_type, expected):
     assert DateOffset(
         **{arithmatic_offset_type: 1}) + self.d == Timestamp(expected)
     assert self.d + DateOffset(
         **{arithmatic_offset_type: 1}) == Timestamp(expected)
Example #29
0
    def test_rollback(self, offset_types):
        expecteds = {
            "BusinessDay": Timestamp("2010-12-31 09:00:00"),
            "CustomBusinessDay": Timestamp("2010-12-31 09:00:00"),
            "CustomBusinessMonthEnd": Timestamp("2010-12-31 09:00:00"),
            "CustomBusinessMonthBegin": Timestamp("2010-12-01 09:00:00"),
            "BusinessMonthBegin": Timestamp("2010-12-01 09:00:00"),
            "MonthEnd": Timestamp("2010-12-31 09:00:00"),
            "SemiMonthEnd": Timestamp("2010-12-31 09:00:00"),
            "BusinessMonthEnd": Timestamp("2010-12-31 09:00:00"),
            "BYearBegin": Timestamp("2010-01-01 09:00:00"),
            "YearEnd": Timestamp("2010-12-31 09:00:00"),
            "BYearEnd": Timestamp("2010-12-31 09:00:00"),
            "QuarterBegin": Timestamp("2010-12-01 09:00:00"),
            "BQuarterBegin": Timestamp("2010-12-01 09:00:00"),
            "QuarterEnd": Timestamp("2010-12-31 09:00:00"),
            "BQuarterEnd": Timestamp("2010-12-31 09:00:00"),
            "BusinessHour": Timestamp("2010-12-31 17:00:00"),
            "CustomBusinessHour": Timestamp("2010-12-31 17:00:00"),
            "WeekOfMonth": Timestamp("2010-12-11 09:00:00"),
            "LastWeekOfMonth": Timestamp("2010-12-25 09:00:00"),
            "FY5253Quarter": Timestamp("2010-10-26 09:00:00"),
            "FY5253": Timestamp("2010-01-26 09:00:00"),
            "Easter": Timestamp("2010-04-04 09:00:00"),
        }

        # result will not be changed if the target is on the offset
        for n in [
                "Day",
                "MonthBegin",
                "SemiMonthBegin",
                "YearBegin",
                "Week",
                "Hour",
                "Minute",
                "Second",
                "Milli",
                "Micro",
                "Nano",
                "DateOffset",
        ]:
            expecteds[n] = Timestamp("2011/01/01 09:00")

        # but be changed when normalize=True
        norm_expected = expecteds.copy()
        for k in norm_expected:
            norm_expected[k] = Timestamp(norm_expected[k].date())

        normalized = {
            "Day": Timestamp("2010-12-31 00:00:00"),
            "DateOffset": Timestamp("2010-12-31 00:00:00"),
            "MonthBegin": Timestamp("2010-12-01 00:00:00"),
            "SemiMonthBegin": Timestamp("2010-12-15 00:00:00"),
            "YearBegin": Timestamp("2010-01-01 00:00:00"),
            "Week": Timestamp("2010-12-25 00:00:00"),
            "Hour": Timestamp("2011-01-01 00:00:00"),
            "Minute": Timestamp("2011-01-01 00:00:00"),
            "Second": Timestamp("2011-01-01 00:00:00"),
            "Milli": Timestamp("2011-01-01 00:00:00"),
            "Micro": Timestamp("2011-01-01 00:00:00"),
        }
        norm_expected.update(normalized)

        sdt = datetime(2011, 1, 1, 9, 0)
        ndt = np.datetime64("2011-01-01 09:00")

        for dt in [sdt, ndt]:
            expected = expecteds[offset_types.__name__]
            self._check_offsetfunc_works(offset_types, "rollback", dt,
                                         expected)

            expected = norm_expected[offset_types.__name__]
            self._check_offsetfunc_works(offset_types,
                                         "rollback",
                                         dt,
                                         expected,
                                         normalize=True)
Example #30
0
 def test_sub(self, arithmatic_offset_type, expected):
     assert self.d - DateOffset(
         **{arithmatic_offset_type: 1}) == Timestamp(expected)
     with pytest.raises(TypeError,
                        match="Cannot subtract datetime from offset"):
         DateOffset(**{arithmatic_offset_type: 1}) - self.d
Example #31
0
 def test_mul_sub(self, arithmatic_offset_type, n, expected):
     assert self.d - DateOffset(
         **{arithmatic_offset_type: 1}) * n == Timestamp(expected)
     assert self.d - n * DateOffset(
         **{arithmatic_offset_type: 1}) == Timestamp(expected)
 def test_round_invalid_arg(self):
     stamp = Timestamp("2000-01-05 05:09:15.13")
     with pytest.raises(ValueError, match=INVALID_FREQ_ERR_MSG):
         stamp.round("foo")