def _coerce_to_data_and_mask(values, mask, dtype, copy, dtype_cls, default_dtype): checker = dtype_cls._checker inferred_type = None if dtype is None and hasattr(values, "dtype"): if checker(values.dtype): dtype = values.dtype if dtype is not None: dtype = dtype_cls._standardize_dtype(dtype) cls = dtype_cls.construct_array_type() if isinstance(values, cls): values, mask = values._data, values._mask if dtype is not None: values = values.astype(dtype.numpy_dtype, copy=False) if copy: values = values.copy() mask = mask.copy() return values, mask, dtype, inferred_type values = np.array(values, copy=copy) inferred_type = None if is_object_dtype(values.dtype) or is_string_dtype(values.dtype): inferred_type = lib.infer_dtype(values, skipna=True) if inferred_type == "empty": pass elif inferred_type == "boolean": name = dtype_cls.__name__.strip("_") raise TypeError(f"{values.dtype} cannot be converted to {name}") elif is_bool_dtype(values) and checker(dtype): values = np.array(values, dtype=default_dtype, copy=copy) elif not (is_integer_dtype(values) or is_float_dtype(values)): name = dtype_cls.__name__.strip("_") raise TypeError(f"{values.dtype} cannot be converted to {name}") if values.ndim != 1: raise TypeError("values must be a 1D list-like") if mask is None: mask = libmissing.is_numeric_na(values) else: assert len(mask) == len(values) if mask.ndim != 1: raise TypeError("mask must be a 1D list-like") # infer dtype if needed if dtype is None: dtype = default_dtype else: dtype = dtype.type # we copy as need to coerce here if mask.any(): values = values.copy() values[mask] = cls._internal_fill_value if inferred_type in ("string", "unicode"): # casts from str are always safe since they raise # a ValueError if the str cannot be parsed into a float values = values.astype(dtype, copy=copy) else: values = dtype_cls._safe_cast(values, dtype, copy=False) return values, mask, dtype, inferred_type
def coerce_to_array(values, dtype=None, mask=None, copy: bool = False) -> tuple[np.ndarray, np.ndarray]: """ Coerce the input values array to numpy arrays with a mask. Parameters ---------- values : 1D list-like dtype : float dtype mask : bool 1D array, optional copy : bool, default False if True, copy the input Returns ------- tuple of (values, mask) """ # if values is floating numpy array, preserve its dtype if dtype is None and hasattr(values, "dtype"): if is_float_dtype(values.dtype): dtype = values.dtype if dtype is not None: if isinstance(dtype, str) and dtype.startswith("Float"): # Avoid DeprecationWarning from NumPy about np.dtype("Float64") # https://github.com/numpy/numpy/pull/7476 dtype = dtype.lower() if not issubclass(type(dtype), FloatingDtype): try: dtype = FLOAT_STR_TO_DTYPE[str(np.dtype(dtype))] except KeyError as err: raise ValueError(f"invalid dtype specified {dtype}") from err if isinstance(values, FloatingArray): values, mask = values._data, values._mask if dtype is not None: values = values.astype(dtype.numpy_dtype, copy=False) if copy: values = values.copy() mask = mask.copy() return values, mask values = np.array(values, copy=copy) if is_object_dtype(values.dtype): inferred_type = lib.infer_dtype(values, skipna=True) if inferred_type == "empty": pass elif inferred_type not in [ "floating", "integer", "mixed-integer", "integer-na", "mixed-integer-float", ]: raise TypeError( f"{values.dtype} cannot be converted to a FloatingDtype") elif is_bool_dtype(values) and is_float_dtype(dtype): values = np.array(values, dtype=float, copy=copy) elif not (is_integer_dtype(values) or is_float_dtype(values)): raise TypeError( f"{values.dtype} cannot be converted to a FloatingDtype") if values.ndim != 1: raise TypeError("values must be a 1D list-like") if mask is None: mask = libmissing.is_numeric_na(values) else: assert len(mask) == len(values) if not mask.ndim == 1: raise TypeError("mask must be a 1D list-like") # infer dtype if needed if dtype is None: dtype = np.dtype("float64") else: dtype = dtype.type # if we are float, let's make sure that we can # safely cast # we copy as need to coerce here # TODO should this be a safe cast? if mask.any(): values = values.copy() values[mask] = np.nan values = values.astype(dtype, copy=False) # , casting="safe") return values, mask
def coerce_to_array( values, dtype, mask=None, copy: bool = False ) -> tuple[np.ndarray, np.ndarray]: """ Coerce the input values array to numpy arrays with a mask. Parameters ---------- values : 1D list-like dtype : integer dtype mask : bool 1D array, optional copy : bool, default False if True, copy the input Returns ------- tuple of (values, mask) """ # if values is integer numpy array, preserve its dtype if dtype is None and hasattr(values, "dtype"): if is_integer_dtype(values.dtype): dtype = values.dtype if dtype is not None: if isinstance(dtype, str) and ( dtype.startswith("Int") or dtype.startswith("UInt") ): # Avoid DeprecationWarning from NumPy about np.dtype("Int64") # https://github.com/numpy/numpy/pull/7476 dtype = dtype.lower() if not issubclass(type(dtype), _IntegerDtype): try: dtype = INT_STR_TO_DTYPE[str(np.dtype(dtype))] except KeyError as err: raise ValueError(f"invalid dtype specified {dtype}") from err if isinstance(values, IntegerArray): values, mask = values._data, values._mask if dtype is not None: values = values.astype(dtype.numpy_dtype, copy=False) if copy: values = values.copy() mask = mask.copy() return values, mask values = np.array(values, copy=copy) inferred_type = None if is_object_dtype(values.dtype) or is_string_dtype(values.dtype): inferred_type = lib.infer_dtype(values, skipna=True) if inferred_type == "empty": pass elif inferred_type == "boolean": raise TypeError(f"{values.dtype} cannot be converted to a FloatingDtype") elif is_bool_dtype(values) and is_integer_dtype(dtype): values = np.array(values, dtype=int, copy=copy) elif not (is_integer_dtype(values) or is_float_dtype(values)): raise TypeError(f"{values.dtype} cannot be converted to an IntegerDtype") if values.ndim != 1: raise TypeError("values must be a 1D list-like") if mask is None: mask = libmissing.is_numeric_na(values) else: assert len(mask) == len(values) if mask.ndim != 1: raise TypeError("mask must be a 1D list-like") # infer dtype if needed if dtype is None: dtype = np.dtype("int64") else: dtype = dtype.type # if we are float, let's make sure that we can # safely cast # we copy as need to coerce here if mask.any(): values = values.copy() values[mask] = 1 if inferred_type in ("string", "unicode"): # casts from str are always safe since they raise # a ValueError if the str cannot be parsed into an int values = values.astype(dtype, copy=copy) else: values = safe_cast(values, dtype, copy=False) return values, mask