Beispiel #1
0
    def astype(self, dtype, copy=True):
        dtype = pandas_dtype(dtype)
        if isinstance(dtype, StringDtype):
            if copy:
                return self.copy()
            return self
        elif isinstance(dtype, _IntegerDtype):
            arr = self._ndarray.copy()
            mask = self.isna()
            arr[mask] = 0
            values = arr.astype(dtype.numpy_dtype)
            return IntegerArray(values, mask, copy=False)
        elif isinstance(dtype, FloatingDtype):
            arr = self.copy()
            mask = self.isna()
            arr[mask] = "0"
            values = arr.astype(dtype.numpy_dtype)
            return FloatingArray(values, mask, copy=False)
        elif np.issubdtype(dtype, np.floating):
            arr = self._ndarray.copy()
            mask = self.isna()
            arr[mask] = 0
            values = arr.astype(dtype)
            values[mask] = np.nan
            return values

        return super().astype(dtype, copy)
Beispiel #2
0
def test_astype_nansafe():
    # https://github.com/pandas-dev/pandas/pull/22343
    arr = IntegerArray([np.nan, 1, 2], dtype="Int8")

    with tm.assert_raises_regex(ValueError,
                                'cannot convert float NaN to integer'):
        arr.astype('uint32')
Beispiel #3
0
    def test_construct_cast_invalid(self, dtype):

        msg = "cannot safely"
        arr = [1.2, 2.3, 3.7]
        with tm.assert_raises_regex(TypeError, msg):
            IntegerArray(arr, dtype=dtype)

        with tm.assert_raises_regex(TypeError, msg):
            pd.Series(arr).astype(dtype)

        arr = [1.2, 2.3, 3.7, np.nan]
        with tm.assert_raises_regex(TypeError, msg):
            IntegerArray(arr, dtype=dtype)

        with tm.assert_raises_regex(TypeError, msg):
            pd.Series(arr).astype(dtype)
Beispiel #4
0
    def astype(self, dtype, copy=True):
        dtype = pandas_dtype(dtype)

        if is_dtype_equal(dtype, self.dtype):
            if copy:
                return self.copy()
            return self

        elif isinstance(dtype, _IntegerDtype):
            arr = self._ndarray.copy()
            mask = self.isna()
            arr[mask] = 0
            values = arr.astype(dtype.numpy_dtype)
            return IntegerArray(values, mask, copy=False)
        elif isinstance(dtype, FloatingDtype):
            arr = self.copy()
            mask = self.isna()
            arr[mask] = "0"
            values = arr.astype(dtype.numpy_dtype)
            return FloatingArray(values, mask, copy=False)
        elif isinstance(dtype, ExtensionDtype):
            cls = dtype.construct_array_type()
            return cls._from_sequence(self, dtype=dtype, copy=copy)
        elif np.issubdtype(dtype, np.floating):
            arr = self._ndarray.copy()
            mask = self.isna()
            arr[mask] = 0
            values = arr.astype(dtype)
            values[mask] = np.nan
            return values

        return super().astype(dtype, copy)
Beispiel #5
0
    def _maybe_mask_result(self, result, mask, other, op_name: str):
        """
        Parameters
        ----------
        result : array-like
        mask : array-like bool
        other : scalar or array-like
        op_name : str
        """
        # if we have a float operand we are by-definition
        # a float result
        # or our op is a divide
        if (is_float_dtype(other) or is_float(other)) or (
            op_name in ["rtruediv", "truediv"]
        ):
            result[mask] = np.nan
            return result

        if is_bool_dtype(result):
            return BooleanArray(result, mask, copy=False)

        elif is_integer_dtype(result):
            from pandas.core.arrays import IntegerArray

            return IntegerArray(result, mask, copy=False)
        else:
            result[mask] = np.nan
            return result
Beispiel #6
0
        def reconstruct(x):
            # we don't worry about scalar `x` here, since we
            # raise for reduce up above.
            from pandas.core.arrays import (
                BooleanArray,
                FloatingArray,
                IntegerArray,
            )

            if is_bool_dtype(x.dtype):
                m = mask.copy()
                return BooleanArray(x, m)
            elif is_integer_dtype(x.dtype):
                m = mask.copy()
                return IntegerArray(x, m)
            elif is_float_dtype(x.dtype):
                m = mask.copy()
                if x.dtype == np.float16:
                    # reached in e.g. np.sqrt on BooleanArray
                    # we don't support float16
                    x = x.astype(np.float32)
                return FloatingArray(x, m)
            else:
                x[mask] = np.nan
            return x
Beispiel #7
0
    def astype(self, dtype, copy=True):
        dtype = pandas_dtype(dtype)

        if is_dtype_equal(dtype, self.dtype):
            if copy:
                return self.copy()
            return self

        elif isinstance(dtype, _IntegerDtype):
            arr = self._ndarray.copy()
            mask = self.isna()
            arr[mask] = 0
            values = arr.astype(dtype.numpy_dtype)
            return IntegerArray(values, mask, copy=False)
        elif isinstance(dtype, FloatingDtype):
            # error: Incompatible types in assignment (expression has type
            # "StringArray", variable has type "ndarray")
            arr = self.copy()  # type: ignore[assignment]
            mask = self.isna()
            arr[mask] = "0"
            values = arr.astype(dtype.numpy_dtype)
            return FloatingArray(values, mask, copy=False)
        elif np.issubdtype(dtype, np.floating):
            arr = self._ndarray.copy()
            mask = self.isna()
            arr[mask] = 0
            values = arr.astype(dtype)
            values[mask] = np.nan
            return values

        return super().astype(dtype, copy)
Beispiel #8
0
    def astype(self, dtype, copy: bool = True) -> ArrayLike:
        """
        Cast to a NumPy array or ExtensionArray with 'dtype'.

        Parameters
        ----------
        dtype : str or dtype
            Typecode or data-type to which the array is cast.
        copy : bool, default True
            Whether to copy the data, even if not necessary. If False,
            a copy is made only if the old dtype does not match the
            new dtype.

        Returns
        -------
        ndarray or ExtensionArray
            NumPy ndarray, BooleanArray or IntegerArray with 'dtype' for its dtype.

        Raises
        ------
        TypeError
            if incompatible type with an BooleanDtype, equivalent of same_kind
            casting
        """
        from pandas.core.arrays.string_ import StringDtype

        dtype = pandas_dtype(dtype)

        if isinstance(dtype, BooleanDtype):
            values, mask = coerce_to_array(self, copy=copy)
            if not copy:
                return self
            else:
                return BooleanArray(values, mask, copy=False)
        elif isinstance(dtype, StringDtype):
            return dtype.construct_array_type()._from_sequence(self, copy=False)

        if is_bool_dtype(dtype):
            # astype_nansafe converts np.nan to True
            if self._hasna:
                raise ValueError("cannot convert float NaN to bool")
            else:
                return self._data.astype(dtype, copy=copy)
        if is_extension_array_dtype(dtype) and is_integer_dtype(dtype):
            from pandas.core.arrays import IntegerArray

            return IntegerArray(
                self._data.astype(dtype.numpy_dtype), self._mask.copy(), copy=False
            )
        # for integer, error if there are missing values
        if is_integer_dtype(dtype):
            if self._hasna:
                raise ValueError("cannot convert NA to integer")
        # for float dtype, ensure we use np.nan before casting (numpy cannot
        # deal with pd.NA)
        na_value = self._na_value
        if is_float_dtype(dtype):
            na_value = np.nan
        # coerce
        return self.to_numpy(dtype=dtype, na_value=na_value, copy=False)
Beispiel #9
0
def test_integer_array_constructor():
    values = np.array([1, 2, 3, 4], dtype='int64')
    mask = np.array([False, False, False, True], dtype='bool')

    result = IntegerArray(values, mask)
    expected = integer_array([1, 2, 3, np.nan], dtype='int64')
    tm.assert_extension_array_equal(result, expected)

    with pytest.raises(TypeError):
        IntegerArray(values.tolist(), mask)

    with pytest.raises(TypeError):
        IntegerArray(values, mask.tolist())

    with pytest.raises(TypeError):
        IntegerArray(values.astype(float), mask)

    with pytest.raises(TypeError):
        IntegerArray(values)
Beispiel #10
0
    def value_counts(self, dropna: bool = True) -> Series:
        """
        Returns a Series containing counts of each unique value.

        Parameters
        ----------
        dropna : bool, default True
            Don't include counts of missing values.

        Returns
        -------
        counts : Series

        See Also
        --------
        Series.value_counts
        """
        from pandas import (
            Index,
            Series,
        )
        from pandas.arrays import IntegerArray

        if dropna:
            keys, counts = algos.value_counts_arraylike(self._data,
                                                        dropna=True,
                                                        mask=self._mask)
            res = Series(counts, index=keys)
            res.index = res.index.astype(self.dtype)
            res = res.astype("Int64")
            return res

        # compute counts on the data with no nans
        data = self._data[~self._mask]
        value_counts = Index(data).value_counts()

        index = value_counts.index

        # if we want nans, count the mask
        if dropna:
            counts = value_counts._values
        else:
            counts = np.empty(len(value_counts) + 1, dtype="int64")
            counts[:-1] = value_counts
            counts[-1] = self._mask.sum()

            index = index.insert(len(index), self.dtype.na_value)

        index = index.astype(self.dtype)

        mask = np.zeros(len(counts), dtype="bool")
        counts = IntegerArray(counts, mask)

        return Series(counts, index=index)
Beispiel #11
0
def test_integer_array_constructor():
    values = np.array([1, 2, 3, 4], dtype="int64")
    mask = np.array([False, False, False, True], dtype="bool")

    result = IntegerArray(values, mask)
    expected = pd.array([1, 2, 3, np.nan], dtype="Int64")
    tm.assert_extension_array_equal(result, expected)

    msg = r".* should be .* numpy array. Use the 'pd.array' function instead"
    with pytest.raises(TypeError, match=msg):
        IntegerArray(values.tolist(), mask)

    with pytest.raises(TypeError, match=msg):
        IntegerArray(values, mask.tolist())

    with pytest.raises(TypeError, match=msg):
        IntegerArray(values.astype(float), mask)
    msg = r"__init__\(\) missing 1 required positional argument: 'mask'"
    with pytest.raises(TypeError, match=msg):
        IntegerArray(values)
Beispiel #12
0
    def value_counts(self, dropna: bool = True) -> Series:
        """
        Returns a Series containing counts of each unique value.

        Parameters
        ----------
        dropna : bool, default True
            Don't include counts of missing values.

        Returns
        -------
        counts : Series

        See Also
        --------
        Series.value_counts
        """
        from pandas import (
            Index,
            Series,
        )
        from pandas.arrays import IntegerArray

        # compute counts on the data with no nans
        data = self._data[~self._mask]
        value_counts = Index(data).value_counts()

        # TODO(ExtensionIndex)
        # if we have allow Index to hold an ExtensionArray
        # this is easier
        index = value_counts.index._values.astype(object)

        # if we want nans, count the mask
        if dropna:
            counts = value_counts._values
        else:
            counts = np.empty(len(value_counts) + 1, dtype="int64")
            counts[:-1] = value_counts
            counts[-1] = self._mask.sum()

            index = Index(
                np.concatenate(
                    [index,
                     np.array([self.dtype.na_value], dtype=object)]),
                dtype=object,
            )

        mask = np.zeros(len(counts), dtype="bool")
        counts = IntegerArray(counts, mask)

        return Series(counts, index=index)
Beispiel #13
0
    def astype(self, dtype, copy=True):
        dtype = pandas_dtype(dtype)
        if isinstance(dtype, StringDtype):
            if copy:
                return self.copy()
            return self
        elif isinstance(dtype, _IntegerDtype):
            arr = self._ndarray.copy()
            mask = self.isna()
            arr[mask] = 0
            values = arr.astype(dtype.numpy_dtype)
            return IntegerArray(values, mask, copy=False)

        return super().astype(dtype, copy)
Beispiel #14
0
    def _maybe_mask_result(self, result, mask, other, op_name: str):
        """
        Parameters
        ----------
        result : array-like
        mask : array-like bool
        other : scalar or array-like
        op_name : str
        """
        if op_name == "divmod":
            # divmod returns a tuple
            div, mod = result
            return (
                self._maybe_mask_result(div, mask, other, "floordiv"),
                self._maybe_mask_result(mod, mask, other, "mod"),
            )

        # if we have a float operand we are by-definition
        # a float result
        # or our op is a divide
        if ((is_float_dtype(other) or is_float(other))
                or (op_name in ["rtruediv", "truediv"]) or
            (is_float_dtype(self.dtype) and is_numeric_dtype(result.dtype))):
            from pandas.core.arrays import FloatingArray

            return FloatingArray(result, mask, copy=False)

        elif is_bool_dtype(result):
            from pandas.core.arrays import BooleanArray

            return BooleanArray(result, mask, copy=False)

        elif result.dtype == "timedelta64[ns]":
            # e.g. test_numeric_arr_mul_tdscalar_numexpr_path
            from pandas.core.arrays import TimedeltaArray

            if not isinstance(result, TimedeltaArray):
                result = TimedeltaArray._simple_new(result)

            result[mask] = result.dtype.type("NaT")
            return result

        elif is_integer_dtype(result):
            from pandas.core.arrays import IntegerArray

            return IntegerArray(result, mask, copy=False)

        else:
            result[mask] = np.nan
            return result
Beispiel #15
0
    def test_construct_index(self, all_data, dropna):
        # ensure that we do not coerce to Float64Index, rather
        # keep as Index

        all_data = all_data[:10]
        if dropna:
            other = np.array(all_data[~all_data.isna()])
        else:
            other = all_data

        result = pd.Index(IntegerArray(other, dtype=all_data.dtype))
        expected = pd.Index(other, dtype=object)

        self.assert_index_equal(result, expected)
Beispiel #16
0
def test_groupby_mean_included():
    df = pd.DataFrame({
        "A": ['a', 'b', 'b'],
        "B": [1, None, 3],
        "C": IntegerArray([1, None, 3], dtype='Int64'),
    })

    result = df.groupby("A").sum()
    # TODO(#22346): preserve Int64 dtype
    expected = pd.DataFrame(
        {
            "B": np.array([1.0, 3.0]),
            "C": np.array([1, 3], dtype="int64")
        },
        index=pd.Index(['a', 'b'], name='A'))
    tm.assert_frame_equal(result, expected)
Beispiel #17
0
        def reconstruct(x):
            # we don't worry about scalar `x` here, since we
            # raise for reduce up above.

            if is_integer_dtype(x.dtype):
                from pandas.core.arrays import IntegerArray

                m = mask.copy()
                return IntegerArray(x, m)
            elif is_float_dtype(x.dtype):
                from pandas.core.arrays import FloatingArray

                m = mask.copy()
                return FloatingArray(x, m)
            else:
                x[mask] = np.nan
            return x
Beispiel #18
0
    def astype(self, dtype, copy=True):
        """
        Cast to a NumPy array or ExtensionArray with 'dtype'.

        Parameters
        ----------
        dtype : str or dtype
            Typecode or data-type to which the array is cast.
        copy : bool, default True
            Whether to copy the data, even if not necessary. If False,
            a copy is made only if the old dtype does not match the
            new dtype.

        Returns
        -------
        array : ndarray or ExtensionArray
            NumPy ndarray, BooleanArray or IntergerArray with 'dtype' for its dtype.

        Raises
        ------
        TypeError
            if incompatible type with an BooleanDtype, equivalent of same_kind
            casting
        """
        dtype = pandas_dtype(dtype)

        if isinstance(dtype, BooleanDtype):
            values, mask = coerce_to_array(self, copy=copy)
            return BooleanArray(values, mask, copy=False)

        if is_bool_dtype(dtype):
            # astype_nansafe converts np.nan to True
            if self.isna().any():
                raise ValueError("cannot convert float NaN to bool")
            else:
                return self._data.astype(dtype, copy=copy)
        if is_extension_array_dtype(dtype) and is_integer_dtype(dtype):
            from pandas.core.arrays import IntegerArray

            return IntegerArray(self._data.astype(dtype.numpy_dtype),
                                self._mask.copy(),
                                copy=False)
        # coerce
        data = self._coerce_to_ndarray()
        return astype_nansafe(data, dtype, copy=None)
Beispiel #19
0
    def _maybe_mask_result(self, result, mask):
        """
        Parameters
        ----------
        result : array-like or tuple[array-like]
        mask : array-like bool
        """
        if isinstance(result, tuple):
            # i.e. divmod
            div, mod = result
            return (
                self._maybe_mask_result(div, mask),
                self._maybe_mask_result(mod, mask),
            )

        if is_float_dtype(result.dtype):
            from pandas.core.arrays import FloatingArray

            return FloatingArray(result, mask, copy=False)

        elif is_bool_dtype(result.dtype):
            from pandas.core.arrays import BooleanArray

            return BooleanArray(result, mask, copy=False)

        elif result.dtype == "timedelta64[ns]":
            # e.g. test_numeric_arr_mul_tdscalar_numexpr_path
            from pandas.core.arrays import TimedeltaArray

            if not isinstance(result, TimedeltaArray):
                result = TimedeltaArray._simple_new(result)

            result[mask] = result.dtype.type("NaT")
            return result

        elif is_integer_dtype(result.dtype):
            from pandas.core.arrays import IntegerArray

            return IntegerArray(result, mask, copy=False)

        else:
            result[mask] = np.nan
            return result
Beispiel #20
0
def test_to_integer_array(values, to_dtype, result_dtype):
    # convert existing arrays to IntegerArrays
    result = to_integer_array(values, dtype=to_dtype)
    expected = IntegerArray(values, dtype=result_dtype())
    tm.assert_extension_array_equal(result, expected)
Beispiel #21
0
def data_missing(dtype):
    return IntegerArray([np.nan, 1], dtype=dtype)
Beispiel #22
0
def data_for_sorting(dtype):
    return IntegerArray([1, 2, 0], dtype=dtype)
Beispiel #23
0
def data(dtype):
    return IntegerArray(make_data(), dtype=dtype)
Beispiel #24
0
def data_missing_for_sorting(dtype):
    return IntegerArray([1, np.nan, 0], dtype=dtype)
Beispiel #25
0
        tm.assert_numpy_array_equal(out, exp)

        arr = np.array([pd.NaT, np.timedelta64(1, "s")], dtype=object)
        exp = np.array([np.timedelta64("NaT"), np.timedelta64(1, "s")], dtype="m8[ns]")
        out = lib.maybe_convert_objects(arr, convert_datetime=1, convert_timedelta=1)
        tm.assert_numpy_array_equal(out, exp)

        arr = np.array([np.timedelta64(1, "s"), np.nan], dtype=object)
        exp = arr.copy()
        out = lib.maybe_convert_objects(arr, convert_datetime=1, convert_timedelta=1)
        tm.assert_numpy_array_equal(out, exp)

    @pytest.mark.parametrize(
        "exp",
        [
            IntegerArray(np.array([2, 0], dtype="i8"), np.array([False, True])),
            IntegerArray(np.array([2, 0], dtype="int64"), np.array([False, True])),
        ],
    )
    def test_maybe_convert_objects_nullable_integer(self, exp):
        # GH27335
        arr = np.array([2, np.NaN], dtype=object)
        result = lib.maybe_convert_objects(arr, convert_to_nullable_integer=1)

        tm.assert_extension_array_equal(result, exp)

    def test_mixed_dtypes_remain_object_array(self):
        # GH14956
        array = np.array([datetime(2015, 1, 1, tzinfo=pytz.utc), 1], dtype=object)
        result = lib.maybe_convert_objects(array, convert_datetime=1)
        tm.assert_numpy_array_equal(result, array)
Beispiel #26
0
def data_for_grouping(dtype):
    b = 1
    a = 0
    c = 2
    na = np.nan
    return IntegerArray([b, b, na, na, a, a, b, c], dtype=dtype)