Beispiel #1
    def astype(self, dtype, copy=True):
        dtype = pandas_dtype(dtype)
        if isinstance(dtype, StringDtype):
            if copy:
                return self.copy()
            return self
        elif isinstance(dtype, _IntegerDtype):
            arr = self._ndarray.copy()
            mask = self.isna()
            arr[mask] = 0
            values = arr.astype(dtype.numpy_dtype)
            return IntegerArray(values, mask, copy=False)
        elif isinstance(dtype, FloatingDtype):
            arr = self.copy()
            mask = self.isna()
            arr[mask] = "0"
            values = arr.astype(dtype.numpy_dtype)
            return FloatingArray(values, mask, copy=False)
        elif np.issubdtype(dtype, np.floating):
            arr = self._ndarray.copy()
            mask = self.isna()
            arr[mask] = 0
            values = arr.astype(dtype)
            values[mask] = np.nan
            return values

        return super().astype(dtype, copy)
Beispiel #2
def test_astype_nansafe():
    arr = IntegerArray([np.nan, 1, 2], dtype="Int8")

    with tm.assert_raises_regex(ValueError,
                                'cannot convert float NaN to integer'):
Beispiel #3
    def test_construct_cast_invalid(self, dtype):

        msg = "cannot safely"
        arr = [1.2, 2.3, 3.7]
        with tm.assert_raises_regex(TypeError, msg):
            IntegerArray(arr, dtype=dtype)

        with tm.assert_raises_regex(TypeError, msg):

        arr = [1.2, 2.3, 3.7, np.nan]
        with tm.assert_raises_regex(TypeError, msg):
            IntegerArray(arr, dtype=dtype)

        with tm.assert_raises_regex(TypeError, msg):
Beispiel #4
    def astype(self, dtype, copy=True):
        dtype = pandas_dtype(dtype)

        if is_dtype_equal(dtype, self.dtype):
            if copy:
                return self.copy()
            return self

        elif isinstance(dtype, _IntegerDtype):
            arr = self._ndarray.copy()
            mask = self.isna()
            arr[mask] = 0
            values = arr.astype(dtype.numpy_dtype)
            return IntegerArray(values, mask, copy=False)
        elif isinstance(dtype, FloatingDtype):
            arr = self.copy()
            mask = self.isna()
            arr[mask] = "0"
            values = arr.astype(dtype.numpy_dtype)
            return FloatingArray(values, mask, copy=False)
        elif isinstance(dtype, ExtensionDtype):
            cls = dtype.construct_array_type()
            return cls._from_sequence(self, dtype=dtype, copy=copy)
        elif np.issubdtype(dtype, np.floating):
            arr = self._ndarray.copy()
            mask = self.isna()
            arr[mask] = 0
            values = arr.astype(dtype)
            values[mask] = np.nan
            return values

        return super().astype(dtype, copy)
Beispiel #5
    def _maybe_mask_result(self, result, mask, other, op_name: str):
        result : array-like
        mask : array-like bool
        other : scalar or array-like
        op_name : str
        # if we have a float operand we are by-definition
        # a float result
        # or our op is a divide
        if (is_float_dtype(other) or is_float(other)) or (
            op_name in ["rtruediv", "truediv"]
            result[mask] = np.nan
            return result

        if is_bool_dtype(result):
            return BooleanArray(result, mask, copy=False)

        elif is_integer_dtype(result):
            from pandas.core.arrays import IntegerArray

            return IntegerArray(result, mask, copy=False)
            result[mask] = np.nan
            return result
Beispiel #6
        def reconstruct(x):
            # we don't worry about scalar `x` here, since we
            # raise for reduce up above.
            from pandas.core.arrays import (

            if is_bool_dtype(x.dtype):
                m = mask.copy()
                return BooleanArray(x, m)
            elif is_integer_dtype(x.dtype):
                m = mask.copy()
                return IntegerArray(x, m)
            elif is_float_dtype(x.dtype):
                m = mask.copy()
                if x.dtype == np.float16:
                    # reached in e.g. np.sqrt on BooleanArray
                    # we don't support float16
                    x = x.astype(np.float32)
                return FloatingArray(x, m)
                x[mask] = np.nan
            return x
Beispiel #7
    def astype(self, dtype, copy=True):
        dtype = pandas_dtype(dtype)

        if is_dtype_equal(dtype, self.dtype):
            if copy:
                return self.copy()
            return self

        elif isinstance(dtype, _IntegerDtype):
            arr = self._ndarray.copy()
            mask = self.isna()
            arr[mask] = 0
            values = arr.astype(dtype.numpy_dtype)
            return IntegerArray(values, mask, copy=False)
        elif isinstance(dtype, FloatingDtype):
            # error: Incompatible types in assignment (expression has type
            # "StringArray", variable has type "ndarray")
            arr = self.copy()  # type: ignore[assignment]
            mask = self.isna()
            arr[mask] = "0"
            values = arr.astype(dtype.numpy_dtype)
            return FloatingArray(values, mask, copy=False)
        elif np.issubdtype(dtype, np.floating):
            arr = self._ndarray.copy()
            mask = self.isna()
            arr[mask] = 0
            values = arr.astype(dtype)
            values[mask] = np.nan
            return values

        return super().astype(dtype, copy)
Beispiel #8
    def astype(self, dtype, copy: bool = True) -> ArrayLike:
        Cast to a NumPy array or ExtensionArray with 'dtype'.

        dtype : str or dtype
            Typecode or data-type to which the array is cast.
        copy : bool, default True
            Whether to copy the data, even if not necessary. If False,
            a copy is made only if the old dtype does not match the
            new dtype.

        ndarray or ExtensionArray
            NumPy ndarray, BooleanArray or IntegerArray with 'dtype' for its dtype.

            if incompatible type with an BooleanDtype, equivalent of same_kind
        from pandas.core.arrays.string_ import StringDtype

        dtype = pandas_dtype(dtype)

        if isinstance(dtype, BooleanDtype):
            values, mask = coerce_to_array(self, copy=copy)
            if not copy:
                return self
                return BooleanArray(values, mask, copy=False)
        elif isinstance(dtype, StringDtype):
            return dtype.construct_array_type()._from_sequence(self, copy=False)

        if is_bool_dtype(dtype):
            # astype_nansafe converts np.nan to True
            if self._hasna:
                raise ValueError("cannot convert float NaN to bool")
                return self._data.astype(dtype, copy=copy)
        if is_extension_array_dtype(dtype) and is_integer_dtype(dtype):
            from pandas.core.arrays import IntegerArray

            return IntegerArray(
                self._data.astype(dtype.numpy_dtype), self._mask.copy(), copy=False
        # for integer, error if there are missing values
        if is_integer_dtype(dtype):
            if self._hasna:
                raise ValueError("cannot convert NA to integer")
        # for float dtype, ensure we use np.nan before casting (numpy cannot
        # deal with pd.NA)
        na_value = self._na_value
        if is_float_dtype(dtype):
            na_value = np.nan
        # coerce
        return self.to_numpy(dtype=dtype, na_value=na_value, copy=False)
Beispiel #9
def test_integer_array_constructor():
    values = np.array([1, 2, 3, 4], dtype='int64')
    mask = np.array([False, False, False, True], dtype='bool')

    result = IntegerArray(values, mask)
    expected = integer_array([1, 2, 3, np.nan], dtype='int64')
    tm.assert_extension_array_equal(result, expected)

    with pytest.raises(TypeError):
        IntegerArray(values.tolist(), mask)

    with pytest.raises(TypeError):
        IntegerArray(values, mask.tolist())

    with pytest.raises(TypeError):
        IntegerArray(values.astype(float), mask)

    with pytest.raises(TypeError):
Beispiel #10
    def value_counts(self, dropna: bool = True) -> Series:
        Returns a Series containing counts of each unique value.

        dropna : bool, default True
            Don't include counts of missing values.

        counts : Series

        See Also
        from pandas import (
        from pandas.arrays import IntegerArray

        if dropna:
            keys, counts = algos.value_counts_arraylike(self._data,
            res = Series(counts, index=keys)
            res.index = res.index.astype(self.dtype)
            res = res.astype("Int64")
            return res

        # compute counts on the data with no nans
        data = self._data[~self._mask]
        value_counts = Index(data).value_counts()

        index = value_counts.index

        # if we want nans, count the mask
        if dropna:
            counts = value_counts._values
            counts = np.empty(len(value_counts) + 1, dtype="int64")
            counts[:-1] = value_counts
            counts[-1] = self._mask.sum()

            index = index.insert(len(index), self.dtype.na_value)

        index = index.astype(self.dtype)

        mask = np.zeros(len(counts), dtype="bool")
        counts = IntegerArray(counts, mask)

        return Series(counts, index=index)
Beispiel #11
def test_integer_array_constructor():
    values = np.array([1, 2, 3, 4], dtype="int64")
    mask = np.array([False, False, False, True], dtype="bool")

    result = IntegerArray(values, mask)
    expected = pd.array([1, 2, 3, np.nan], dtype="Int64")
    tm.assert_extension_array_equal(result, expected)

    msg = r".* should be .* numpy array. Use the 'pd.array' function instead"
    with pytest.raises(TypeError, match=msg):
        IntegerArray(values.tolist(), mask)

    with pytest.raises(TypeError, match=msg):
        IntegerArray(values, mask.tolist())

    with pytest.raises(TypeError, match=msg):
        IntegerArray(values.astype(float), mask)
    msg = r"__init__\(\) missing 1 required positional argument: 'mask'"
    with pytest.raises(TypeError, match=msg):
Beispiel #12
    def value_counts(self, dropna: bool = True) -> Series:
        Returns a Series containing counts of each unique value.

        dropna : bool, default True
            Don't include counts of missing values.

        counts : Series

        See Also
        from pandas import (
        from pandas.arrays import IntegerArray

        # compute counts on the data with no nans
        data = self._data[~self._mask]
        value_counts = Index(data).value_counts()

        # TODO(ExtensionIndex)
        # if we have allow Index to hold an ExtensionArray
        # this is easier
        index = value_counts.index._values.astype(object)

        # if we want nans, count the mask
        if dropna:
            counts = value_counts._values
            counts = np.empty(len(value_counts) + 1, dtype="int64")
            counts[:-1] = value_counts
            counts[-1] = self._mask.sum()

            index = Index(
                     np.array([self.dtype.na_value], dtype=object)]),

        mask = np.zeros(len(counts), dtype="bool")
        counts = IntegerArray(counts, mask)

        return Series(counts, index=index)
Beispiel #13
    def astype(self, dtype, copy=True):
        dtype = pandas_dtype(dtype)
        if isinstance(dtype, StringDtype):
            if copy:
                return self.copy()
            return self
        elif isinstance(dtype, _IntegerDtype):
            arr = self._ndarray.copy()
            mask = self.isna()
            arr[mask] = 0
            values = arr.astype(dtype.numpy_dtype)
            return IntegerArray(values, mask, copy=False)

        return super().astype(dtype, copy)
Beispiel #14
    def _maybe_mask_result(self, result, mask, other, op_name: str):
        result : array-like
        mask : array-like bool
        other : scalar or array-like
        op_name : str
        if op_name == "divmod":
            # divmod returns a tuple
            div, mod = result
            return (
                self._maybe_mask_result(div, mask, other, "floordiv"),
                self._maybe_mask_result(mod, mask, other, "mod"),

        # if we have a float operand we are by-definition
        # a float result
        # or our op is a divide
        if ((is_float_dtype(other) or is_float(other))
                or (op_name in ["rtruediv", "truediv"]) or
            (is_float_dtype(self.dtype) and is_numeric_dtype(result.dtype))):
            from pandas.core.arrays import FloatingArray

            return FloatingArray(result, mask, copy=False)

        elif is_bool_dtype(result):
            from pandas.core.arrays import BooleanArray

            return BooleanArray(result, mask, copy=False)

        elif result.dtype == "timedelta64[ns]":
            # e.g. test_numeric_arr_mul_tdscalar_numexpr_path
            from pandas.core.arrays import TimedeltaArray

            if not isinstance(result, TimedeltaArray):
                result = TimedeltaArray._simple_new(result)

            result[mask] = result.dtype.type("NaT")
            return result

        elif is_integer_dtype(result):
            from pandas.core.arrays import IntegerArray

            return IntegerArray(result, mask, copy=False)

            result[mask] = np.nan
            return result
Beispiel #15
    def test_construct_index(self, all_data, dropna):
        # ensure that we do not coerce to Float64Index, rather
        # keep as Index

        all_data = all_data[:10]
        if dropna:
            other = np.array(all_data[~all_data.isna()])
            other = all_data

        result = pd.Index(IntegerArray(other, dtype=all_data.dtype))
        expected = pd.Index(other, dtype=object)

        self.assert_index_equal(result, expected)
Beispiel #16
def test_groupby_mean_included():
    df = pd.DataFrame({
        "A": ['a', 'b', 'b'],
        "B": [1, None, 3],
        "C": IntegerArray([1, None, 3], dtype='Int64'),

    result = df.groupby("A").sum()
    # TODO(#22346): preserve Int64 dtype
    expected = pd.DataFrame(
            "B": np.array([1.0, 3.0]),
            "C": np.array([1, 3], dtype="int64")
        index=pd.Index(['a', 'b'], name='A'))
    tm.assert_frame_equal(result, expected)
Beispiel #17
        def reconstruct(x):
            # we don't worry about scalar `x` here, since we
            # raise for reduce up above.

            if is_integer_dtype(x.dtype):
                from pandas.core.arrays import IntegerArray

                m = mask.copy()
                return IntegerArray(x, m)
            elif is_float_dtype(x.dtype):
                from pandas.core.arrays import FloatingArray

                m = mask.copy()
                return FloatingArray(x, m)
                x[mask] = np.nan
            return x
Beispiel #18
    def astype(self, dtype, copy=True):
        Cast to a NumPy array or ExtensionArray with 'dtype'.

        dtype : str or dtype
            Typecode or data-type to which the array is cast.
        copy : bool, default True
            Whether to copy the data, even if not necessary. If False,
            a copy is made only if the old dtype does not match the
            new dtype.

        array : ndarray or ExtensionArray
            NumPy ndarray, BooleanArray or IntergerArray with 'dtype' for its dtype.

            if incompatible type with an BooleanDtype, equivalent of same_kind
        dtype = pandas_dtype(dtype)

        if isinstance(dtype, BooleanDtype):
            values, mask = coerce_to_array(self, copy=copy)
            return BooleanArray(values, mask, copy=False)

        if is_bool_dtype(dtype):
            # astype_nansafe converts np.nan to True
            if self.isna().any():
                raise ValueError("cannot convert float NaN to bool")
                return self._data.astype(dtype, copy=copy)
        if is_extension_array_dtype(dtype) and is_integer_dtype(dtype):
            from pandas.core.arrays import IntegerArray

            return IntegerArray(self._data.astype(dtype.numpy_dtype),
        # coerce
        data = self._coerce_to_ndarray()
        return astype_nansafe(data, dtype, copy=None)
Beispiel #19
    def _maybe_mask_result(self, result, mask):
        result : array-like or tuple[array-like]
        mask : array-like bool
        if isinstance(result, tuple):
            # i.e. divmod
            div, mod = result
            return (
                self._maybe_mask_result(div, mask),
                self._maybe_mask_result(mod, mask),

        if is_float_dtype(result.dtype):
            from pandas.core.arrays import FloatingArray

            return FloatingArray(result, mask, copy=False)

        elif is_bool_dtype(result.dtype):
            from pandas.core.arrays import BooleanArray

            return BooleanArray(result, mask, copy=False)

        elif result.dtype == "timedelta64[ns]":
            # e.g. test_numeric_arr_mul_tdscalar_numexpr_path
            from pandas.core.arrays import TimedeltaArray

            if not isinstance(result, TimedeltaArray):
                result = TimedeltaArray._simple_new(result)

            result[mask] = result.dtype.type("NaT")
            return result

        elif is_integer_dtype(result.dtype):
            from pandas.core.arrays import IntegerArray

            return IntegerArray(result, mask, copy=False)

            result[mask] = np.nan
            return result
Beispiel #20
def test_to_integer_array(values, to_dtype, result_dtype):
    # convert existing arrays to IntegerArrays
    result = to_integer_array(values, dtype=to_dtype)
    expected = IntegerArray(values, dtype=result_dtype())
    tm.assert_extension_array_equal(result, expected)
Beispiel #21
def data_missing(dtype):
    return IntegerArray([np.nan, 1], dtype=dtype)
Beispiel #22
def data_for_sorting(dtype):
    return IntegerArray([1, 2, 0], dtype=dtype)
Beispiel #23
def data(dtype):
    return IntegerArray(make_data(), dtype=dtype)
Beispiel #24
def data_missing_for_sorting(dtype):
    return IntegerArray([1, np.nan, 0], dtype=dtype)
Beispiel #25
        tm.assert_numpy_array_equal(out, exp)

        arr = np.array([pd.NaT, np.timedelta64(1, "s")], dtype=object)
        exp = np.array([np.timedelta64("NaT"), np.timedelta64(1, "s")], dtype="m8[ns]")
        out = lib.maybe_convert_objects(arr, convert_datetime=1, convert_timedelta=1)
        tm.assert_numpy_array_equal(out, exp)

        arr = np.array([np.timedelta64(1, "s"), np.nan], dtype=object)
        exp = arr.copy()
        out = lib.maybe_convert_objects(arr, convert_datetime=1, convert_timedelta=1)
        tm.assert_numpy_array_equal(out, exp)

            IntegerArray(np.array([2, 0], dtype="i8"), np.array([False, True])),
            IntegerArray(np.array([2, 0], dtype="int64"), np.array([False, True])),
    def test_maybe_convert_objects_nullable_integer(self, exp):
        # GH27335
        arr = np.array([2, np.NaN], dtype=object)
        result = lib.maybe_convert_objects(arr, convert_to_nullable_integer=1)

        tm.assert_extension_array_equal(result, exp)

    def test_mixed_dtypes_remain_object_array(self):
        # GH14956
        array = np.array([datetime(2015, 1, 1, tzinfo=pytz.utc), 1], dtype=object)
        result = lib.maybe_convert_objects(array, convert_datetime=1)
        tm.assert_numpy_array_equal(result, array)
Beispiel #26
def data_for_grouping(dtype):
    b = 1
    a = 0
    c = 2
    na = np.nan
    return IntegerArray([b, b, na, na, a, a, b, c], dtype=dtype)