Example #1
0
    def test_take_fill_valid(self, timedelta_index):
        tdi = timedelta_index
        arr = TimedeltaArray(tdi)

        td1 = pd.Timedelta(days=1)
        result = arr.take([-1, 1], allow_fill=True, fill_value=td1)
        assert result[0] == td1

        now = Timestamp.now()
        value = now
        msg = f"value should be a '{arr._scalar_type.__name__}' or 'NaT'. Got"
        with pytest.raises(TypeError, match=msg):
            # fill_value Timestamp invalid
            arr.take([0, 1], allow_fill=True, fill_value=value)

        value = now.to_period("D")
        with pytest.raises(TypeError, match=msg):
            # fill_value Period invalid
            arr.take([0, 1], allow_fill=True, fill_value=value)

        value = np.datetime64("NaT", "ns")
        with pytest.raises(TypeError, match=msg):
            # require appropriate-dtype if we have a NA value
            arr.take([-1, 1], allow_fill=True, fill_value=value)
Example #2
0
class SharedTests:
    index_cls: Type[Union[DatetimeIndex, PeriodIndex, TimedeltaIndex]]

    @pytest.fixture
    def arr1d(self):
        data = np.arange(10, dtype="i8") * 24 * 3600 * 10**9
        arr = self.array_cls(data, freq="D")
        return arr

    def test_compare_len1_raises(self, arr1d):
        # make sure we raise when comparing with different lengths, specific
        #  to the case where one has length-1, which numpy would broadcast
        arr = arr1d
        idx = self.index_cls(arr)

        with pytest.raises(ValueError, match="Lengths must match"):
            arr == arr[:1]

        # test the index classes while we're at it, GH#23078
        with pytest.raises(ValueError, match="Lengths must match"):
            idx <= idx[[0]]

    @pytest.mark.parametrize(
        "result",
        [
            pd.date_range("2020", periods=3),
            pd.date_range("2020", periods=3, tz="UTC"),
            pd.timedelta_range("0 days", periods=3),
            pd.period_range("2020Q1", periods=3, freq="Q"),
        ],
    )
    def test_compare_with_Categorical(self, result):
        expected = pd.Categorical(result)
        assert all(result == expected)
        assert not any(result != expected)

    @pytest.mark.parametrize("reverse", [True, False])
    @pytest.mark.parametrize("as_index", [True, False])
    def test_compare_categorical_dtype(self, arr1d, as_index, reverse,
                                       ordered):
        other = pd.Categorical(arr1d, ordered=ordered)
        if as_index:
            other = pd.CategoricalIndex(other)

        left, right = arr1d, other
        if reverse:
            left, right = right, left

        ones = np.ones(arr1d.shape, dtype=bool)
        zeros = ~ones

        result = left == right
        tm.assert_numpy_array_equal(result, ones)

        result = left != right
        tm.assert_numpy_array_equal(result, zeros)

        if not reverse and not as_index:
            # Otherwise Categorical raises TypeError bc it is not ordered
            # TODO: we should probably get the same behavior regardless?
            result = left < right
            tm.assert_numpy_array_equal(result, zeros)

            result = left <= right
            tm.assert_numpy_array_equal(result, ones)

            result = left > right
            tm.assert_numpy_array_equal(result, zeros)

            result = left >= right
            tm.assert_numpy_array_equal(result, ones)

    def test_take(self):
        data = np.arange(100, dtype="i8") * 24 * 3600 * 10**9
        np.random.shuffle(data)

        freq = None if self.array_cls is not PeriodArray else "D"

        arr = self.array_cls(data, freq=freq)
        idx = self.index_cls._simple_new(arr)

        takers = [1, 4, 94]
        result = arr.take(takers)
        expected = idx.take(takers)

        tm.assert_index_equal(self.index_cls(result), expected)

        takers = np.array([1, 4, 94])
        result = arr.take(takers)
        expected = idx.take(takers)

        tm.assert_index_equal(self.index_cls(result), expected)

    @pytest.mark.parametrize("fill_value", [2, 2.0, Timestamp.now().time])
    def test_take_fill_raises(self, fill_value):
        data = np.arange(10, dtype="i8") * 24 * 3600 * 10**9

        arr = self.array_cls(data, freq="D")

        msg = f"value should be a '{arr._scalar_type.__name__}' or 'NaT'. Got"
        with pytest.raises(TypeError, match=msg):
            arr.take([0, 1], allow_fill=True, fill_value=fill_value)

    def test_take_fill(self):
        data = np.arange(10, dtype="i8") * 24 * 3600 * 10**9

        arr = self.array_cls(data, freq="D")

        result = arr.take([-1, 1], allow_fill=True, fill_value=None)
        assert result[0] is NaT

        result = arr.take([-1, 1], allow_fill=True, fill_value=np.nan)
        assert result[0] is NaT

        result = arr.take([-1, 1], allow_fill=True, fill_value=NaT)
        assert result[0] is NaT

    def test_take_fill_str(self, arr1d):
        # Cast str fill_value matching other fill_value-taking methods
        result = arr1d.take([-1, 1],
                            allow_fill=True,
                            fill_value=str(arr1d[-1]))
        expected = arr1d[[-1, 1]]
        tm.assert_equal(result, expected)

        msg = f"value should be a '{arr1d._scalar_type.__name__}' or 'NaT'. Got"
        with pytest.raises(TypeError, match=msg):
            arr1d.take([-1, 1], allow_fill=True, fill_value="foo")

    def test_concat_same_type(self, arr1d):
        arr = arr1d
        idx = self.index_cls(arr)
        idx = idx.insert(0, NaT)
        arr = self.array_cls(idx)

        result = arr._concat_same_type([arr[:-1], arr[1:], arr])
        arr2 = arr.astype(object)
        expected = self.index_cls(np.concatenate([arr2[:-1], arr2[1:], arr2]),
                                  None)

        tm.assert_index_equal(self.index_cls(result), expected)

    def test_unbox_scalar(self):
        data = np.arange(10, dtype="i8") * 24 * 3600 * 10**9
        arr = self.array_cls(data, freq="D")
        result = arr._unbox_scalar(arr[0])
        expected = arr._data.dtype.type
        assert isinstance(result, expected)

        result = arr._unbox_scalar(NaT)
        assert isinstance(result, expected)

        msg = f"'value' should be a {self.scalar_type.__name__}."
        with pytest.raises(ValueError, match=msg):
            arr._unbox_scalar("foo")

    def test_check_compatible_with(self):
        data = np.arange(10, dtype="i8") * 24 * 3600 * 10**9
        arr = self.array_cls(data, freq="D")

        arr._check_compatible_with(arr[0])
        arr._check_compatible_with(arr[:1])
        arr._check_compatible_with(NaT)

    def test_scalar_from_string(self):
        data = np.arange(10, dtype="i8") * 24 * 3600 * 10**9
        arr = self.array_cls(data, freq="D")
        result = arr._scalar_from_string(str(arr[0]))
        assert result == arr[0]

    def test_reduce_invalid(self):
        data = np.arange(10, dtype="i8") * 24 * 3600 * 10**9
        arr = self.array_cls(data, freq="D")

        msg = f"'{type(arr).__name__}' does not implement reduction 'not a method'"
        with pytest.raises(TypeError, match=msg):
            arr._reduce("not a method")

    @pytest.mark.parametrize("method", ["pad", "backfill"])
    def test_fillna_method_doesnt_change_orig(self, method):
        data = np.arange(10, dtype="i8") * 24 * 3600 * 10**9
        arr = self.array_cls(data, freq="D")
        arr[4] = NaT

        fill_value = arr[3] if method == "pad" else arr[5]

        result = arr.fillna(method=method)
        assert result[4] == fill_value

        # check that the original was not changed
        assert arr[4] is NaT

    def test_searchsorted(self):
        data = np.arange(10, dtype="i8") * 24 * 3600 * 10**9
        arr = self.array_cls(data, freq="D")

        # scalar
        result = arr.searchsorted(arr[1])
        assert result == 1

        result = arr.searchsorted(arr[2], side="right")
        assert result == 3

        # own-type
        result = arr.searchsorted(arr[1:3])
        expected = np.array([1, 2], dtype=np.intp)
        tm.assert_numpy_array_equal(result, expected)

        result = arr.searchsorted(arr[1:3], side="right")
        expected = np.array([2, 3], dtype=np.intp)
        tm.assert_numpy_array_equal(result, expected)

        # GH#29884 match numpy convention on whether NaT goes
        #  at the end or the beginning
        result = arr.searchsorted(NaT)
        if np_version_under1p18:
            # Following numpy convention, NaT goes at the beginning
            #  (unlike NaN which goes at the end)
            assert result == 0
        else:
            assert result == 10

    @pytest.mark.parametrize("box", [None, "index", "series"])
    def test_searchsorted_castable_strings(self, arr1d, box, request):
        if isinstance(arr1d, DatetimeArray):
            tz = arr1d.tz
            ts1, ts2 = arr1d[1:3]
            if tz is not None and ts1.tz.tzname(ts1) != ts2.tz.tzname(ts2):
                # If we have e.g. tzutc(), when we cast to string and parse
                #  back we get pytz.UTC, and then consider them different timezones
                #  so incorrectly raise.
                mark = pytest.mark.xfail(
                    reason="timezone comparisons inconsistent")
                request.node.add_marker(mark)

        arr = arr1d
        if box is None:
            pass
        elif box == "index":
            # Test the equivalent Index.searchsorted method while we're here
            arr = self.index_cls(arr)
        else:
            # Test the equivalent Series.searchsorted method while we're here
            arr = pd.Series(arr)

        # scalar
        result = arr.searchsorted(str(arr[1]))
        assert result == 1

        result = arr.searchsorted(str(arr[2]), side="right")
        assert result == 3

        result = arr.searchsorted([str(x) for x in arr[1:3]])
        expected = np.array([1, 2], dtype=np.intp)
        tm.assert_numpy_array_equal(result, expected)

        with pytest.raises(
                TypeError,
                match=re.escape(
                    f"value should be a '{arr1d._scalar_type.__name__}', 'NaT', "
                    "or array of those. Got 'str' instead."),
        ):
            arr.searchsorted("foo")

        with pytest.raises(
                TypeError,
                match=re.escape(
                    f"value should be a '{arr1d._scalar_type.__name__}', 'NaT', "
                    "or array of those. Got 'StringArray' instead."),
        ):
            arr.searchsorted([str(arr[1]), "baz"])

    def test_getitem_near_implementation_bounds(self):
        # We only check tz-naive for DTA bc the bounds are slightly different
        #  for other tzs
        i8vals = np.asarray([NaT.value + n for n in range(1, 5)], dtype="i8")
        arr = self.array_cls(i8vals, freq="ns")
        arr[0]  # should not raise OutOfBoundsDatetime

        index = pd.Index(arr)
        index[0]  # should not raise OutOfBoundsDatetime

        ser = pd.Series(arr)
        ser[0]  # should not raise OutOfBoundsDatetime

    def test_getitem_2d(self, arr1d):
        # 2d slicing on a 1D array
        expected = type(arr1d)(arr1d._data[:, np.newaxis], dtype=arr1d.dtype)
        result = arr1d[:, np.newaxis]
        tm.assert_equal(result, expected)

        # Lookup on a 2D array
        arr2d = expected
        expected = type(arr2d)(arr2d._data[:3, 0], dtype=arr2d.dtype)
        result = arr2d[:3, 0]
        tm.assert_equal(result, expected)

        # Scalar lookup
        result = arr2d[-1, 0]
        expected = arr1d[-1]
        assert result == expected

    def test_iter_2d(self, arr1d):
        data2d = arr1d._data[:3, np.newaxis]
        arr2d = type(arr1d)._simple_new(data2d, dtype=arr1d.dtype)
        result = list(arr2d)
        assert len(result) == 3
        for x in result:
            assert isinstance(x, type(arr1d))
            assert x.ndim == 1
            assert x.dtype == arr1d.dtype

    def test_repr_2d(self, arr1d):
        data2d = arr1d._data[:3, np.newaxis]
        arr2d = type(arr1d)._simple_new(data2d, dtype=arr1d.dtype)

        result = repr(arr2d)

        if isinstance(arr2d, TimedeltaArray):
            expected = (f"<{type(arr2d).__name__}>\n"
                        "[\n"
                        f"['{arr1d[0]._repr_base()}'],\n"
                        f"['{arr1d[1]._repr_base()}'],\n"
                        f"['{arr1d[2]._repr_base()}']\n"
                        "]\n"
                        f"Shape: (3, 1), dtype: {arr1d.dtype}")
        else:
            expected = (f"<{type(arr2d).__name__}>\n"
                        "[\n"
                        f"['{arr1d[0]}'],\n"
                        f"['{arr1d[1]}'],\n"
                        f"['{arr1d[2]}']\n"
                        "]\n"
                        f"Shape: (3, 1), dtype: {arr1d.dtype}")

        assert result == expected

    def test_setitem(self):
        data = np.arange(10, dtype="i8") * 24 * 3600 * 10**9
        arr = self.array_cls(data, freq="D")

        arr[0] = arr[1]
        expected = np.arange(10, dtype="i8") * 24 * 3600 * 10**9
        expected[0] = expected[1]

        tm.assert_numpy_array_equal(arr.asi8, expected)

        arr[:2] = arr[-2:]
        expected[:2] = expected[-2:]
        tm.assert_numpy_array_equal(arr.asi8, expected)

    @pytest.mark.parametrize(
        "box",
        [
            pd.Index,
            pd.Series,
            np.array,
            list,
            PandasArray,
        ],
    )
    def test_setitem_object_dtype(self, box, arr1d):

        expected = arr1d.copy()[::-1]
        if expected.dtype.kind in ["m", "M"]:
            expected = expected._with_freq(None)

        vals = expected
        if box is list:
            vals = list(vals)
        elif box is np.array:
            # if we do np.array(x).astype(object) then dt64 and td64 cast to ints
            vals = np.array(vals.astype(object))
        elif box is PandasArray:
            vals = box(np.asarray(vals, dtype=object))
        else:
            vals = box(vals).astype(object)

        arr1d[:] = vals

        tm.assert_equal(arr1d, expected)

    def test_setitem_strs(self, arr1d, request):
        # Check that we parse strs in both scalar and listlike
        if isinstance(arr1d, DatetimeArray):
            tz = arr1d.tz
            ts1, ts2 = arr1d[-2:]
            if tz is not None and ts1.tz.tzname(ts1) != ts2.tz.tzname(ts2):
                # If we have e.g. tzutc(), when we cast to string and parse
                #  back we get pytz.UTC, and then consider them different timezones
                #  so incorrectly raise.
                mark = pytest.mark.xfail(
                    reason="timezone comparisons inconsistent")
                request.node.add_marker(mark)

        # Setting list-like of strs
        expected = arr1d.copy()
        expected[[0, 1]] = arr1d[-2:]

        result = arr1d.copy()
        result[:2] = [str(x) for x in arr1d[-2:]]
        tm.assert_equal(result, expected)

        # Same thing but now for just a scalar str
        expected = arr1d.copy()
        expected[0] = arr1d[-1]

        result = arr1d.copy()
        result[0] = str(arr1d[-1])
        tm.assert_equal(result, expected)

    @pytest.mark.parametrize("as_index", [True, False])
    def test_setitem_categorical(self, arr1d, as_index):
        expected = arr1d.copy()[::-1]
        if not isinstance(expected, PeriodArray):
            expected = expected._with_freq(None)

        cat = pd.Categorical(arr1d)
        if as_index:
            cat = pd.CategoricalIndex(cat)

        arr1d[:] = cat[::-1]

        tm.assert_equal(arr1d, expected)

    def test_setitem_raises(self):
        data = np.arange(10, dtype="i8") * 24 * 3600 * 10**9
        arr = self.array_cls(data, freq="D")
        val = arr[0]

        with pytest.raises(IndexError, match="index 12 is out of bounds"):
            arr[12] = val

        with pytest.raises(TypeError, match="value should be a.* 'object'"):
            arr[0] = object()

        msg = "cannot set using a list-like indexer with a different length"
        with pytest.raises(ValueError, match=msg):
            # GH#36339
            arr[[]] = [arr[1]]

        msg = "cannot set using a slice indexer with a different length than"
        with pytest.raises(ValueError, match=msg):
            # GH#36339
            arr[1:1] = arr[:3]

    @pytest.mark.parametrize("box", [list, np.array, pd.Index, pd.Series])
    def test_setitem_numeric_raises(self, arr1d, box):
        # We dont case e.g. int64 to our own dtype for setitem

        msg = (f"value should be a '{arr1d._scalar_type.__name__}', "
               "'NaT', or array of those. Got")
        with pytest.raises(TypeError, match=msg):
            arr1d[:2] = box([0, 1])

        with pytest.raises(TypeError, match=msg):
            arr1d[:2] = box([0.0, 1.0])

    def test_inplace_arithmetic(self):
        # GH#24115 check that iadd and isub are actually in-place
        data = np.arange(10, dtype="i8") * 24 * 3600 * 10**9
        arr = self.array_cls(data, freq="D")

        expected = arr + pd.Timedelta(days=1)
        arr += pd.Timedelta(days=1)
        tm.assert_equal(arr, expected)

        expected = arr - pd.Timedelta(days=1)
        arr -= pd.Timedelta(days=1)
        tm.assert_equal(arr, expected)

    def test_shift_fill_int_deprecated(self):
        # GH#31971
        data = np.arange(10, dtype="i8") * 24 * 3600 * 10**9
        arr = self.array_cls(data, freq="D")

        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            result = arr.shift(1, fill_value=1)

        expected = arr.copy()
        if self.array_cls is PeriodArray:
            fill_val = PeriodArray._scalar_type._from_ordinal(1, freq=arr.freq)
        else:
            fill_val = arr._scalar_type(1)
        expected[0] = fill_val
        expected[1:] = arr[:-1]
        tm.assert_equal(result, expected)

    def test_median(self, arr1d):
        arr = arr1d
        if len(arr) % 2 == 0:
            # make it easier to define `expected`
            arr = arr[:-1]

        expected = arr[len(arr) // 2]

        result = arr.median()
        assert type(result) is type(expected)
        assert result == expected

        arr[len(arr) // 2] = NaT
        if not isinstance(expected, Period):
            expected = arr[len(arr) // 2 - 1:len(arr) // 2 + 2].mean()

        assert arr.median(skipna=False) is NaT

        result = arr.median()
        assert type(result) is type(expected)
        assert result == expected

        assert arr[:0].median() is NaT
        assert arr[:0].median(skipna=False) is NaT

        # 2d Case
        arr2 = arr.reshape(-1, 1)

        result = arr2.median(axis=None)
        assert type(result) is type(expected)
        assert result == expected

        assert arr2.median(axis=None, skipna=False) is NaT

        result = arr2.median(axis=0)
        expected2 = type(arr)._from_sequence([expected], dtype=arr.dtype)
        tm.assert_equal(result, expected2)

        result = arr2.median(axis=0, skipna=False)
        expected2 = type(arr)._from_sequence([NaT], dtype=arr.dtype)
        tm.assert_equal(result, expected2)

        result = arr2.median(axis=1)
        tm.assert_equal(result, arr)

        result = arr2.median(axis=1, skipna=False)
        tm.assert_equal(result, arr)

    def test_from_integer_array(self):
        arr = np.array([1, 2, 3], dtype=np.int64)
        expected = self.array_cls(arr, dtype=self.example_dtype)

        data = pd.array(arr, dtype="Int64")
        result = self.array_cls(data, dtype=self.example_dtype)

        tm.assert_extension_array_equal(result, expected)