def map_f(values, f): return lib.map_infer_mask(values, f, isna(values).view(np.uint8))
def _convert_to_ndarrays(self, dct, na_values, na_fvalues, verbose=False, converters=None, dtypes=None): result = {} for c, values in dct.items(): conv_f = None if converters is None else converters.get(c, None) if isinstance(dtypes, dict): cast_type = dtypes.get(c, None) else: # single dtype or None cast_type = dtypes if self.na_filter: col_na_values, col_na_fvalues = _get_na_values( c, na_values, na_fvalues, self.keep_default_na) else: col_na_values, col_na_fvalues = set(), set() if conv_f is not None: # conv_f applied to data before inference if cast_type is not None: warnings.warn( ("Both a converter and dtype were specified " f"for column {c} - only the converter will be used"), ParserWarning, stacklevel=7, ) try: values = lib.map_infer(values, conv_f) except ValueError: # error: Argument 2 to "isin" has incompatible type "List[Any]"; # expected "Union[Union[ExtensionArray, ndarray], Index, Series]" mask = algorithms.isin( values, list(na_values) # type: ignore[arg-type] ).view(np.uint8) values = lib.map_infer_mask(values, conv_f, mask) cvals, na_count = self._infer_types(values, set(col_na_values) | col_na_fvalues, try_num_bool=False) else: is_ea = is_extension_array_dtype(cast_type) is_str_or_ea_dtype = is_ea or is_string_dtype(cast_type) # skip inference if specified dtype is object # or casting to an EA try_num_bool = not (cast_type and is_str_or_ea_dtype) # general type inference and conversion cvals, na_count = self._infer_types( values, set(col_na_values) | col_na_fvalues, try_num_bool) # type specified in dtype param or cast_type is an EA if cast_type and (not is_dtype_equal(cvals, cast_type) or is_extension_array_dtype(cast_type)): if not is_ea and na_count > 0: try: if is_bool_dtype(cast_type): raise ValueError( f"Bool column has NA values in column {c}") except (AttributeError, TypeError): # invalid input to is_bool_dtype pass cast_type = pandas_dtype(cast_type) cvals = self._cast_types(cvals, cast_type, c) result[c] = cvals if verbose and na_count: print(f"Filled {na_count} NA values in column {c!s}") return result
def _map_values(self, mapper, na_action=None): """ An internal function that maps values using the input correspondence (which can be a dict, Series, or function). Parameters ---------- mapper : function, dict, or Series The input correspondence object na_action : {None, 'ignore'} If 'ignore', propagate NA values, without passing them to the mapping function Returns ------- Union[Index, MultiIndex], inferred The output of the mapping function applied to the index. If the function returns a tuple with more than one element a MultiIndex will be returned. """ # we can fastpath dict/Series to an efficient map # as we know that we are not going to have to yield # python types if is_dict_like(mapper): if isinstance(mapper, dict) and hasattr(mapper, "__missing__"): # If a dictionary subclass defines a default value method, # convert mapper to a lookup function (GH #15999). dict_with_default = mapper mapper = lambda x: dict_with_default[x] else: # Dictionary does not have a default. Thus it's safe to # convert to an Series for efficiency. # we specify the keys here to handle the # possibility that they are tuples # The return value of mapping with an empty mapper is # expected to be pd.Series(np.nan, ...). As np.nan is # of dtype float64 the return value of this method should # be float64 as well mapper = create_series_with_explicit_dtype( mapper, dtype_if_empty=np.float64 ) if isinstance(mapper, ABCSeries): # Since values were input this means we came from either # a dict or a series and mapper should be an index if is_categorical_dtype(self.dtype): # use the built in categorical series mapper which saves # time by mapping the categories instead of all values # pandas\core\base.py:893: error: Incompatible types in # assignment (expression has type "Categorical", variable has # type "IndexOpsMixin") [assignment] self = cast("Categorical", self) # type: ignore[assignment] # pandas\core\base.py:894: error: Item "ExtensionArray" of # "Union[ExtensionArray, Any]" has no attribute "map" # [union-attr] return self._values.map(mapper) # type: ignore[union-attr] values = self._values indexer = mapper.index.get_indexer(values) new_values = algorithms.take_1d(mapper._values, indexer) return new_values # we must convert to python types if is_extension_array_dtype(self.dtype) and hasattr(self._values, "map"): # GH#23179 some EAs do not have `map` values = self._values if na_action is not None: raise NotImplementedError map_f = lambda values, f: values.map(f) else: # pandas\core\base.py:1142: error: "IndexOpsMixin" has no attribute # "astype" [attr-defined] values = self.astype(object)._values # type: ignore[attr-defined] if na_action == "ignore": map_f = lambda values, f: lib.map_infer_mask( values, f, isna(values).view(np.uint8) ) elif na_action is None: map_f = lib.map_infer else: msg = ( "na_action must either be 'ignore' or None, " f"{na_action} was passed" ) raise ValueError(msg) # mapper is a function new_values = map_f(values, mapper) return new_values
def _str_map( self, f, na_value=None, dtype: Dtype | None = None, convert: bool = True ): # TODO: de-duplicate with StringArray method. This method is moreless copy and # paste. from pandas.arrays import ( BooleanArray, IntegerArray, ) if dtype is None: dtype = self.dtype if na_value is None: na_value = self.dtype.na_value mask = isna(self) arr = np.asarray(self) if is_integer_dtype(dtype) or is_bool_dtype(dtype): constructor: type[IntegerArray] | type[BooleanArray] if is_integer_dtype(dtype): constructor = IntegerArray else: constructor = BooleanArray na_value_is_na = isna(na_value) if na_value_is_na: na_value = 1 result = lib.map_infer_mask( arr, f, mask.view("uint8"), convert=False, na_value=na_value, # error: Value of type variable "_DTypeScalar" of "dtype" cannot be # "object" # error: Argument 1 to "dtype" has incompatible type # "Union[ExtensionDtype, str, dtype[Any], Type[object]]"; expected # "Type[object]" dtype=np.dtype(dtype), # type: ignore[type-var,arg-type] ) if not na_value_is_na: mask[:] = False return constructor(result, mask) elif is_string_dtype(dtype) and not is_object_dtype(dtype): # i.e. StringDtype result = lib.map_infer_mask( arr, f, mask.view("uint8"), convert=False, na_value=na_value ) result = pa.array(result, mask=mask, type=pa.string(), from_pandas=True) return type(self)(result) else: # This is when the result type is object. We reach this when # -> We know the result type is truly object (e.g. .encode returns bytes # or .findall returns a list). # -> We don't know the result type. E.g. `.get` can return anything. return lib.map_infer_mask(arr, f, mask.view("uint8"))
def _str_map(self, f, na_value=None, dtype: Dtype | None = None, convert: bool = True): """ Map a callable over valid elements of the array. Parameters ---------- f : Callable A function to call on each non-NA element. na_value : Scalar, optional The value to set for NA values. Might also be used for the fill value if the callable `f` raises an exception. This defaults to ``self._str_na_value`` which is ``np.nan`` for object-dtype and Categorical and ``pd.NA`` for StringArray. dtype : Dtype, optional The dtype of the result array. convert : bool, default True Whether to call `maybe_convert_objects` on the resulting ndarray """ if dtype is None: dtype = np.dtype("object") if na_value is None: na_value = self._str_na_value if not len(self): # error: Argument 1 to "ndarray" has incompatible type "int"; # expected "Sequence[int]" return np.ndarray(0, dtype=dtype) # type: ignore[arg-type] arr = np.asarray(self, dtype=object) mask = isna(arr) map_convert = convert and not np.all(mask) try: result = lib.map_infer_mask(arr, f, mask.view(np.uint8), map_convert) except (TypeError, AttributeError) as e: # Reraise the exception if callable `f` got wrong number of args. # The user may want to be warned by this, instead of getting NaN p_err = (r"((takes)|(missing)) (?(2)from \d+ to )?\d+ " r"(?(3)required )positional arguments?") if len(e.args) >= 1 and re.search(p_err, e.args[0]): # FIXME: this should be totally avoidable raise e def g(x): # This type of fallback behavior can be removed once # we remove object-dtype .str accessor. try: return f(x) except (TypeError, AttributeError): return na_value return self._str_map(g, na_value=na_value, dtype=dtype) if not isinstance(result, np.ndarray): return result if na_value is not np.nan: np.putmask(result, mask, na_value) if convert and result.dtype == object: result = lib.maybe_convert_objects(result) return result