Esempio n. 1
0
    def _cmp_method(self, other, op):
        from pandas.arrays import BooleanArray

        if isinstance(other, StringArray):
            other = other._ndarray

        mask = isna(self) | isna(other)
        valid = ~mask

        if not lib.is_scalar(other):
            if len(other) != len(self):
                # prevent improper broadcasting when other is 2D
                raise ValueError(
                    f"Lengths of operands do not match: {len(self)} != {len(other)}"
                )

            other = np.asarray(other)
            other = other[valid]

        if op.__name__ in ops.ARITHMETIC_BINOPS:
            result = np.empty_like(self._ndarray, dtype="object")
            result[mask] = StringDtype.na_value
            result[valid] = op(self._ndarray[valid], other)
            return StringArray(result)
        else:
            # logical
            result = np.zeros(len(self._ndarray), dtype="bool")
            result[valid] = op(self._ndarray[valid], other)
            return BooleanArray(result, mask)
Esempio n. 2
0
    def _str_map(self, f, na_value=None, dtype: Optional[Dtype] = None):
        from pandas.arrays import (
            BooleanArray,
            IntegerArray,
            StringArray,
        )
        from pandas.core.arrays.string_ import StringDtype

        if dtype is None:
            dtype = StringDtype()
        if na_value is None:
            na_value = self.dtype.na_value

        mask = isna(self)
        arr = np.asarray(self)

        if is_integer_dtype(dtype) or is_bool_dtype(dtype):
            constructor: Union[Type[IntegerArray], Type[BooleanArray]]
            if is_integer_dtype(dtype):
                constructor = IntegerArray
            else:
                constructor = BooleanArray

            na_value_is_na = isna(na_value)
            if na_value_is_na:
                na_value = 1
            result = lib.map_infer_mask(
                arr,
                f,
                mask.view("uint8"),
                convert=False,
                na_value=na_value,
                # error: Value of type variable "_DTypeScalar" of "dtype" cannot be
                # "object"
                # error: Argument 1 to "dtype" has incompatible type
                # "Union[ExtensionDtype, str, dtype[Any], Type[object]]"; expected
                # "Type[object]"
                dtype=np.dtype(dtype),  # type: ignore[type-var,arg-type]
            )

            if not na_value_is_na:
                mask[:] = False

            return constructor(result, mask)

        elif is_string_dtype(dtype) and not is_object_dtype(dtype):
            # i.e. StringDtype
            result = lib.map_infer_mask(arr,
                                        f,
                                        mask.view("uint8"),
                                        convert=False,
                                        na_value=na_value)
            return StringArray(result)
        else:
            # This is when the result type is object. We reach this when
            # -> We know the result type is truly object (e.g. .encode returns bytes
            #    or .findall returns a list).
            # -> We don't know the result type. E.g. `.get` can return anything.
            return lib.map_infer_mask(arr, f, mask.view("uint8"))
Esempio n. 3
0
    def _str_map(self, f, na_value=None, dtype: Optional[Dtype] = None):
        from pandas.arrays import (
            BooleanArray,
            IntegerArray,
            StringArray,
        )
        from pandas.core.arrays.string_ import StringDtype

        if dtype is None:
            dtype = StringDtype()
        if na_value is None:
            na_value = self.dtype.na_value

        mask = isna(self)
        arr = np.asarray(self)

        if is_integer_dtype(dtype) or is_bool_dtype(dtype):
            constructor: Union[Type[IntegerArray], Type[BooleanArray]]
            if is_integer_dtype(dtype):
                constructor = IntegerArray
            else:
                constructor = BooleanArray

            na_value_is_na = isna(na_value)
            if na_value_is_na:
                na_value = 1
            result = lib.map_infer_mask(
                arr,
                f,
                mask.view("uint8"),
                convert=False,
                na_value=na_value,
                dtype=np.dtype(dtype),
            )

            if not na_value_is_na:
                mask[:] = False

            return constructor(result, mask)

        elif is_string_dtype(dtype) and not is_object_dtype(dtype):
            # i.e. StringDtype
            result = lib.map_infer_mask(
                arr, f, mask.view("uint8"), convert=False, na_value=na_value
            )
            return StringArray(result)
        else:
            # This is when the result type is object. We reach this when
            # -> We know the result type is truly object (e.g. .encode returns bytes
            #    or .findall returns a list).
            # -> We don't know the result type. E.g. `.get` can return anything.
            return lib.map_infer_mask(arr, f, mask.view("uint8"))
Esempio n. 4
0
    def astype(self,
               dtype: Any,
               copy: bool = True,
               casting: str = "unsafe") -> Any:
        _logger.debug("RLEArray.astype(dtype=%r, copy=%r)", dtype, copy)
        if isinstance(dtype, RLEDtype):
            if (not copy) and (dtype == self.dtype):
                return self
            return RLEArray(
                data=self._data.astype(dtype._dtype, casting=casting),
                positions=self._positions.copy(),
            )
        if isinstance(dtype, pd.StringDtype):
            # TODO: fast-path
            return StringArray._from_sequence([str(x) for x in self])

        if casting != "unsafe":
            return np.array(self, copy=copy).astype(dtype=dtype,
                                                    casting=casting)
        else:
            return np.array(self, dtype=dtype, copy=copy)
Esempio n. 5
0
     pd.CategoricalDtype(None, ordered=True),
     pd.Categorical(["a", "b"], ordered=True),
 ),
 # Interval
 (
     [pd.Interval(1, 2), pd.Interval(3, 4)],
     "interval",
     IntervalArray.from_tuples([(1, 2), (3, 4)]),
 ),
 # Sparse
 ([0, 1], "Sparse[int64]", SparseArray([0, 1], dtype="int64")),
 # IntegerNA
 ([1, None], "Int16", integer_array([1, None], dtype="Int16")),
 (pd.Series([1, 2]), None, PandasArray(np.array([1, 2], dtype=np.int64))),
 # String
 (["a", None], "string", StringArray._from_sequence(["a", None])),
 (["a", None], pd.StringDtype(), StringArray._from_sequence(["a", None])),
 # Boolean
 ([True, None], "boolean", BooleanArray._from_sequence([True, None])),
 ([True, None], pd.BooleanDtype(), BooleanArray._from_sequence([True, None])),
 # Index
 (pd.Index([1, 2]), None, PandasArray(np.array([1, 2], dtype=np.int64))),
 # Series[EA] returns the EA
 (
     pd.Series(pd.Categorical(["a", "b"], categories=["a", "b", "c"])),
     None,
     pd.Categorical(["a", "b"], categories=["a", "b", "c"]),
 ),
 # "3rd party" EAs work
 ([decimal.Decimal(0), decimal.Decimal(1)], "decimal", to_decimal([0, 1])),
 # pass an ExtensionArray, but a different dtype
Esempio n. 6
0
 def peakmem_stringarray_construction(self):
     StringArray(self.series_arr)
Esempio n. 7
0
 def time_string_array_with_nan_construction(self):
     StringArray(self.series_arr_nan)
Esempio n. 8
0
 def time_string_array_construction(self):
     StringArray(self.series_arr)
Esempio n. 9
0
                na_value = 1
            result = lib.map_infer_mask(
                arr,
                f,
                mask.view("uint8"),
                convert=False,
                na_value=na_value,
                dtype=np.dtype(dtype),
            )

            if not na_value_is_na:
                mask[:] = False

            return constructor(result, mask)

        elif is_string_dtype(dtype) and not is_object_dtype(dtype):
            # i.e. StringDtype
            result = lib.map_infer_mask(
                arr, f, mask.view("uint8"), convert=False, na_value=na_value
            )
            return StringArray(result)
        else:
            # This is when the result type is object. We reach this when
            # -> We know the result type is truly object (e.g. .encode returns bytes
            #    or .findall returns a list).
            # -> We don't know the result type. E.g. `.get` can return anything.
            return lib.map_infer_mask(arr, f, mask.view("uint8"))


StringArray._add_arithmetic_ops()
Esempio n. 10
0
                f,
                mask.view("uint8"),
                convert=False,
                na_value=na_value,
                dtype=np.dtype(dtype),
            )

            if not na_value_is_na:
                mask[:] = False

            return constructor(result, mask)

        elif is_string_dtype(dtype) and not is_object_dtype(dtype):
            # i.e. StringDtype
            result = lib.map_infer_mask(arr,
                                        f,
                                        mask.view("uint8"),
                                        convert=False,
                                        na_value=na_value)
            return StringArray(result)
        else:
            # This is when the result type is object. We reach this when
            # -> We know the result type is truly object (e.g. .encode returns bytes
            #    or .findall returns a list).
            # -> We don't know the result type. E.g. `.get` can return anything.
            return lib.map_infer_mask(arr, f, mask.view("uint8"))


StringArray._add_arithmetic_ops()
StringArray._add_comparison_ops()