Exemple #1
0
    def apply_standard(self) -> FrameOrSeriesUnion:
        f = self.f
        obj = self.obj

        with np.errstate(all="ignore"):
            if isinstance(f, np.ufunc):
                return f(obj)

            # row-wise access
            if is_extension_array_dtype(obj.dtype) and hasattr(
                    obj._values, "map"):
                # GH#23179 some EAs do not have `map`
                mapped = obj._values.map(f)
            else:
                values = obj.astype(object)._values
                mapped = lib.map_infer(values, f, convert=self.convert_dtype)

        if len(mapped) and isinstance(mapped[0], ABCSeries):
            # GH 25959 use pd.array instead of tolist
            # so extension arrays can be used
            return obj._constructor_expanddim(pd_array(mapped),
                                              index=obj.index)
        else:
            return obj._constructor(mapped, index=obj.index).__finalize__(
                obj, method="apply")
Exemple #2
0
    def apply_standard(self) -> DataFrame | Series:
        f = self.f
        obj = self.obj

        with np.errstate(all="ignore"):
            if isinstance(f, np.ufunc):
                return f(obj)

            # row-wise access
            if is_extension_array_dtype(obj.dtype) and hasattr(
                    obj._values, "map"):
                # GH#23179 some EAs do not have `map`
                mapped = obj._values.map(f)
            else:
                values = obj.astype(object)._values
                # error: Argument 2 to "map_infer" has incompatible type
                # "Union[Callable[..., Any], str, List[Union[Callable[..., Any], str]],
                # Dict[Hashable, Union[Union[Callable[..., Any], str],
                # List[Union[Callable[..., Any], str]]]]]"; expected
                # "Callable[[Any], Any]"
                mapped = lib.map_infer(
                    values,
                    f,  # type: ignore[arg-type]
                    convert=self.convert_dtype,
                )

        if len(mapped) and isinstance(mapped[0], ABCSeries):
            # GH#43986 Need to do list(mapped) in order to get treated as nested
            #  See also GH#25959 regarding EA support
            return obj._constructor_expanddim(list(mapped), index=obj.index)
        else:
            return obj._constructor(mapped, index=obj.index).__finalize__(
                obj, method="apply")
    def apply_standard(self) -> FrameOrSeriesUnion:
        f = self.f
        obj = self.obj

        with np.errstate(all="ignore"):
            if isinstance(f, np.ufunc):
                return f(obj)

            # row-wise access
            if is_extension_array_dtype(obj.dtype) and hasattr(obj._values, "map"):
                # GH#23179 some EAs do not have `map`
                mapped = obj._values.map(f)
            else:
                values = obj.astype(object)._values
                # error: Argument 2 to "map_infer" has incompatible type
                # "Union[Callable[..., Any], str, List[Union[Callable[..., Any], str]],
                # Dict[Hashable, Union[Union[Callable[..., Any], str],
                # List[Union[Callable[..., Any], str]]]]]"; expected
                # "Callable[[Any], Any]"
                mapped = lib.map_infer(
                    values,
                    f,  # type: ignore[arg-type]
                    convert=self.convert_dtype,
                )

        if len(mapped) and isinstance(mapped[0], ABCSeries):
            # GH 25959 use pd.array instead of tolist
            # so extension arrays can be used
            return obj._constructor_expanddim(pd_array(mapped), index=obj.index)
        else:
            return obj._constructor(mapped, index=obj.index).__finalize__(
                obj, method="apply"
            )
Exemple #4
0
    def apply_standard(self) -> FrameOrSeriesUnion:
        f = self.f
        obj = self.obj

        with np.errstate(all="ignore"):
            if isinstance(f, np.ufunc):
                # error: Argument 1 to "__call__" of "ufunc" has incompatible type
                # "Series"; expected "Union[Union[int, float, complex, str, bytes,
                # generic], Sequence[Union[int, float, complex, str, bytes, generic]],
                # Sequence[Sequence[Any]], _SupportsArray]"
                return f(obj)  # type: ignore[arg-type]

            # row-wise access
            if is_extension_array_dtype(obj.dtype) and hasattr(obj._values, "map"):
                # GH#23179 some EAs do not have `map`
                mapped = obj._values.map(f)
            else:
                values = obj.astype(object)._values
                mapped = lib.map_infer(values, f, convert=self.convert_dtype)

        if len(mapped) and isinstance(mapped[0], ABCSeries):
            # GH 25959 use pd.array instead of tolist
            # so extension arrays can be used
            return obj._constructor_expanddim(pd_array(mapped), index=obj.index)
        else:
            return obj._constructor(mapped, index=obj.index).__finalize__(
                obj, method="apply"
            )
Exemple #5
0
    def apply_standard(self) -> DataFrame | Series:
        # caller is responsible for ensuring that f is Callable
        f = cast(Callable, self.f)
        obj = self.obj

        with np.errstate(all="ignore"):
            if isinstance(f, np.ufunc):
                return f(obj)

            # row-wise access
            if is_extension_array_dtype(obj.dtype) and hasattr(obj._values, "map"):
                # GH#23179 some EAs do not have `map`
                mapped = obj._values.map(f)
            else:
                values = obj.astype(object)._values
                mapped = lib.map_infer(
                    values,
                    f,
                    convert=self.convert_dtype,
                )

        if len(mapped) and isinstance(mapped[0], ABCSeries):
            # GH#43986 Need to do list(mapped) in order to get treated as nested
            #  See also GH#25959 regarding EA support
            return obj._constructor_expanddim(list(mapped), index=obj.index)
        else:
            return obj._constructor(mapped, index=obj.index).__finalize__(
                obj, method="apply"
            )
Exemple #6
0
    def _sub_func(c):
        _var_score = np.array(fa_df[['Bins', 'Score']][fa_df.Var == c.name])

        def _get_score(v):
            try:
                return list(
                    filter(
                        lambda x: v in x[0] if type(x[0]) is pd._libs.interval.
                        Interval else v == x[0], _var_score))[0][1]
            except:
                return 0

        return pd.Series(lib.map_infer(c.astype(object).values, _get_score),
                         name=c.name)
Exemple #7
0
    def _str_get_dummies(self, sep="|"):
        from pandas import Series

        arr = Series(self).fillna("")
        try:
            arr = sep + arr + sep
        except TypeError:
            arr = sep + arr.astype(str) + sep

        tags: set[str] = set()
        for ts in Series(arr).str.split(sep):
            tags.update(ts)
        tags2 = sorted(tags - {""})

        dummies = np.empty((len(arr), len(tags2)), dtype=np.int64)

        for i, t in enumerate(tags2):
            pat = sep + t + sep
            dummies[:, i] = lib.map_infer(arr.to_numpy(), lambda x: pat in x)
        return dummies, tags2
Exemple #8
0
 def _box_values(self, values):
     """
     apply box func to passed values
     """
     return lib.map_infer(values, self._box_func)
Exemple #9
0
    def _convert_to_ndarrays(
        self,
        dct: Mapping,
        na_values,
        na_fvalues,
        verbose: bool = False,
        converters=None,
        dtypes=None,
    ):
        result = {}
        for c, values in dct.items():
            conv_f = None if converters is None else converters.get(c, None)
            if isinstance(dtypes, dict):
                cast_type = dtypes.get(c, None)
            else:
                # single dtype or None
                cast_type = dtypes

            if self.na_filter:
                col_na_values, col_na_fvalues = _get_na_values(
                    c, na_values, na_fvalues, self.keep_default_na)
            else:
                col_na_values, col_na_fvalues = set(), set()

            if c in self._parse_date_cols:
                # GH#26203 Do not convert columns which get converted to dates
                # but replace nans to ensure to_datetime works
                mask = algorithms.isin(values,
                                       set(col_na_values) | col_na_fvalues)
                np.putmask(values, mask, np.nan)
                result[c] = values
                continue

            if conv_f is not None:
                # conv_f applied to data before inference
                if cast_type is not None:
                    warnings.warn(
                        ("Both a converter and dtype were specified "
                         f"for column {c} - only the converter will be used."),
                        ParserWarning,
                        stacklevel=find_stack_level(),
                    )

                try:
                    values = lib.map_infer(values, conv_f)
                except ValueError:
                    # error: Argument 2 to "isin" has incompatible type "List[Any]";
                    # expected "Union[Union[ExtensionArray, ndarray], Index, Series]"
                    mask = algorithms.isin(
                        values,
                        list(na_values)  # type: ignore[arg-type]
                    ).view(np.uint8)
                    values = lib.map_infer_mask(values, conv_f, mask)

                cvals, na_count = self._infer_types(values,
                                                    set(col_na_values)
                                                    | col_na_fvalues,
                                                    try_num_bool=False)
            else:
                is_ea = is_extension_array_dtype(cast_type)
                is_str_or_ea_dtype = is_ea or is_string_dtype(cast_type)
                # skip inference if specified dtype is object
                # or casting to an EA
                try_num_bool = not (cast_type and is_str_or_ea_dtype)

                # general type inference and conversion
                cvals, na_count = self._infer_types(
                    values,
                    set(col_na_values) | col_na_fvalues, try_num_bool)

                # type specified in dtype param or cast_type is an EA
                if cast_type and (not is_dtype_equal(cvals, cast_type)
                                  or is_extension_array_dtype(cast_type)):
                    if not is_ea and na_count > 0:
                        try:
                            if is_bool_dtype(cast_type):
                                raise ValueError(
                                    f"Bool column has NA values in column {c}")
                        except (AttributeError, TypeError):
                            # invalid input to is_bool_dtype
                            pass
                    cast_type = pandas_dtype(cast_type)
                    cvals = self._cast_types(cvals, cast_type, c)

            result[c] = cvals
            if verbose and na_count:
                print(f"Filled {na_count} NA values in column {c!s}")
        return result
Exemple #10
0
 def infer(x):
     dictx_sub = dictx[x.name]
     if x.empty:
         return lib.map_infer(x, lambda d: dictx_sub.get(d))
     return lib.map_infer(
         x.astype(object).values, lambda d: dictx_sub.get(d))
Exemple #11
0
 def _box_values(self, values):
     """
     apply box func to passed values
     """
     return lib.map_infer(values, self._box_func)
Exemple #12
0
    def _convert_to_ndarrays(self,
                             dct,
                             na_values,
                             na_fvalues,
                             verbose=False,
                             converters=None,
                             dtypes=None):
        result = {}
        for c, values in dct.items():
            conv_f = None if converters is None else converters.get(c, None)
            if isinstance(dtypes, dict):
                cast_type = dtypes.get(c, None)
            else:
                # single dtype or None
                cast_type = dtypes

            if self.na_filter:
                col_na_values, col_na_fvalues = _get_na_values(
                    c, na_values, na_fvalues, self.keep_default_na)
            else:
                col_na_values, col_na_fvalues = set(), set()

            if conv_f is not None:
                # conv_f applied to data before inference
                if cast_type is not None:
                    warnings.warn(
                        ("Both a converter and dtype were specified "
                         f"for column {c} - only the converter will be used"),
                        ParserWarning,
                        stacklevel=7,
                    )

                try:
                    values = lib.map_infer(values, conv_f)
                except ValueError:
                    mask = algorithms.isin(values,
                                           list(na_values)).view(np.uint8)
                    values = lib.map_infer_mask(values, conv_f, mask)

                cvals, na_count = self._infer_types(values,
                                                    set(col_na_values)
                                                    | col_na_fvalues,
                                                    try_num_bool=False)
            else:
                is_ea = is_extension_array_dtype(cast_type)
                is_str_or_ea_dtype = is_ea or is_string_dtype(cast_type)
                # skip inference if specified dtype is object
                # or casting to an EA
                try_num_bool = not (cast_type and is_str_or_ea_dtype)

                # general type inference and conversion
                cvals, na_count = self._infer_types(
                    values,
                    set(col_na_values) | col_na_fvalues, try_num_bool)

                # type specified in dtype param or cast_type is an EA
                if cast_type and (not is_dtype_equal(cvals, cast_type)
                                  or is_extension_array_dtype(cast_type)):
                    if not is_ea and na_count > 0:
                        try:
                            if is_bool_dtype(cast_type):
                                raise ValueError(
                                    f"Bool column has NA values in column {c}")
                        except (AttributeError, TypeError):
                            # invalid input to is_bool_dtype
                            pass
                    cast_type = pandas_dtype(cast_type)
                    cvals = self._cast_types(cvals, cast_type, c)

            result[c] = cvals
            if verbose and na_count:
                print(f"Filled {na_count} NA values in column {c!s}")
        return result