def _check_promote( dtype, fill_value, boxed, box_dtype, expected_dtype, exp_val_for_scalar=None, exp_val_for_array=None, ): """ Auxiliary function to unify testing of scalar/array promotion. Parameters ---------- dtype : dtype The value to pass on as the first argument to maybe_promote. fill_value : scalar The value to pass on as the second argument to maybe_promote, either as a scalar, or boxed into an array (depending on the parameter `boxed`). boxed : Boolean Parameter whether fill_value should be passed to maybe_promote directly, or wrapped in an array (of dtype box_dtype). box_dtype : dtype The dtype to enforce when wrapping fill_value into an np.array. expected_dtype : dtype The expected dtype returned by maybe_promote (by design this is the same regardless of whether fill_value was passed as a scalar or in an array!). exp_val_for_scalar : scalar The expected value for the (potentially upcast) fill_value returned by maybe_promote. exp_val_for_array : scalar The expected missing value marker for the expected_dtype (which is returned by maybe_promote when it receives an array). """ assert is_scalar(fill_value) if boxed: # in this case, we pass on fill_value wrapped in an array of specified # box_dtype; the expected value returned from maybe_promote is the # missing value marker for the returned dtype. fill_array = np.array([fill_value], dtype=box_dtype) result_dtype, result_fill_value = maybe_promote(dtype, fill_array) expected_fill_value = exp_val_for_array else: # here, we pass on fill_value as a scalar directly; the expected value # returned from maybe_promote is fill_value, potentially upcast to the # returned dtype. result_dtype, result_fill_value = maybe_promote(dtype, fill_value) expected_fill_value = exp_val_for_scalar assert result_dtype == expected_dtype _assert_match(result_fill_value, expected_fill_value)
def _take_preprocess_indexer_and_fill_value( arr: np.ndarray, indexer: np.ndarray, out: np.ndarray | None, fill_value, allow_fill: bool, ): mask_info = None if not allow_fill: dtype, fill_value = arr.dtype, arr.dtype.type() mask_info = None, False else: # check for promotion based on types only (do this first because # it's faster than computing a mask) dtype, fill_value = maybe_promote(arr.dtype, fill_value) if dtype != arr.dtype: # check if promotion is actually required based on indexer mask = indexer == -1 needs_masking = mask.any() mask_info = mask, needs_masking if needs_masking: if out is not None and out.dtype != dtype: raise TypeError("Incompatible type for fill_value") else: # if not, then depromote, set fill_value to dummy # (it won't be used but we don't want the cython code # to crash when trying to cast it to dtype) dtype, fill_value = arr.dtype, arr.dtype.type() indexer = ensure_platform_int(indexer) return indexer, dtype, fill_value, mask_info
def _check_promote(dtype, fill_value, expected_dtype, exp_val_for_scalar=None): """ Auxiliary function to unify testing of scalar/array promotion. Parameters ---------- dtype : dtype The value to pass on as the first argument to maybe_promote. fill_value : scalar The value to pass on as the second argument to maybe_promote as a scalar. expected_dtype : dtype The expected dtype returned by maybe_promote (by design this is the same regardless of whether fill_value was passed as a scalar or in an array!). exp_val_for_scalar : scalar The expected value for the (potentially upcast) fill_value returned by maybe_promote. """ assert is_scalar(fill_value) # here, we pass on fill_value as a scalar directly; the expected value # returned from maybe_promote is fill_value, potentially upcast to the # returned dtype. result_dtype, result_fill_value = maybe_promote(dtype, fill_value) expected_fill_value = exp_val_for_scalar assert result_dtype == expected_dtype _assert_match(result_fill_value, expected_fill_value)
def _take_preprocess_indexer_and_fill_value( arr: np.ndarray, indexer: npt.NDArray[np.intp], fill_value, allow_fill: bool, ): mask_info = None if not allow_fill: dtype, fill_value = arr.dtype, arr.dtype.type() mask_info = None, False else: # check for promotion based on types only (do this first because # it's faster than computing a mask) dtype, fill_value = maybe_promote(arr.dtype, fill_value) if dtype != arr.dtype: # check if promotion is actually required based on indexer mask = indexer == -1 needs_masking = mask.any() mask_info = mask, needs_masking if not needs_masking: # if not, then depromote, set fill_value to dummy # (it won't be used but we don't want the cython code # to crash when trying to cast it to dtype) dtype, fill_value = arr.dtype, arr.dtype.type() return dtype, fill_value, mask_info
def get_new_values(self): values = self.values # place the values length, width = self.full_shape stride = values.shape[1] result_width = width * stride result_shape = (length, result_width) mask = self.mask mask_all = mask.all() # we can simply reshape if we don't have a mask if mask_all and len(values): new_values = (self.sorted_values.reshape( length, width, stride).swapaxes(1, 2).reshape(result_shape)) new_mask = np.ones(result_shape, dtype=bool) return new_values, new_mask # if our mask is all True, then we can use our existing dtype if mask_all: dtype = values.dtype new_values = np.empty(result_shape, dtype=dtype) else: dtype, fill_value = maybe_promote(values.dtype, self.fill_value) new_values = np.empty(result_shape, dtype=dtype) new_values.fill(fill_value) new_mask = np.zeros(result_shape, dtype=bool) name = np.dtype(dtype).name sorted_values = self.sorted_values # we need to convert to a basic dtype # and possibly coerce an input to our output dtype # e.g. ints -> floats if needs_i8_conversion(values): sorted_values = sorted_values.view("i8") new_values = new_values.view("i8") elif is_bool_dtype(values): sorted_values = sorted_values.astype("object") new_values = new_values.astype("object") else: sorted_values = sorted_values.astype(name, copy=False) # fill in our values & mask libreshape.unstack( sorted_values, mask.view("u1"), stride, length, width, new_values, new_mask.view("u1"), ) # reconstruct dtype if needed if needs_i8_conversion(values): new_values = new_values.view(values.dtype) return new_values, new_mask
def take_2d_multi( arr: np.ndarray, indexer: tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]], fill_value=np.nan, ) -> np.ndarray: """ Specialized Cython take which sets NaN values in one pass. """ # This is only called from one place in DataFrame._reindex_multi, # so we know indexer is well-behaved. assert indexer is not None assert indexer[0] is not None assert indexer[1] is not None row_idx, col_idx = indexer row_idx = ensure_platform_int(row_idx) col_idx = ensure_platform_int(col_idx) indexer = row_idx, col_idx mask_info = None # check for promotion based on types only (do this first because # it's faster than computing a mask) dtype, fill_value = maybe_promote(arr.dtype, fill_value) if dtype != arr.dtype: # check if promotion is actually required based on indexer row_mask = row_idx == -1 col_mask = col_idx == -1 row_needs = row_mask.any() col_needs = col_mask.any() mask_info = (row_mask, col_mask), (row_needs, col_needs) if not (row_needs or col_needs): # if not, then depromote, set fill_value to dummy # (it won't be used but we don't want the cython code # to crash when trying to cast it to dtype) dtype, fill_value = arr.dtype, arr.dtype.type() # at this point, it's guaranteed that dtype can hold both the arr values # and the fill_value out_shape = len(row_idx), len(col_idx) out = np.empty(out_shape, dtype=dtype) func = _take_2d_multi_dict.get((arr.dtype.name, out.dtype.name), None) if func is None and arr.dtype != out.dtype: func = _take_2d_multi_dict.get((out.dtype.name, out.dtype.name), None) if func is not None: func = _convert_wrapper(func, out.dtype) if func is not None: func(arr, indexer, out=out, fill_value=fill_value) else: # test_reindex_multi _take_2d_multi_object(arr, indexer, out, fill_value=fill_value, mask_info=mask_info) return out
def dtype(self): if self.block is None: raise AssertionError("Block is None, no dtype") if not self.needs_filling: return self.block.dtype else: return get_dtype(maybe_promote(self.block.dtype, self.block.fill_value)[0])
def dtype(self): if self.block is None: raise AssertionError("Block is None, no dtype") if not self.needs_filling: return self.block.dtype else: return _get_dtype(maybe_promote(self.block.dtype, self.block.fill_value)[0])
def test_maybe_promote_dimensions(any_numpy_dtype_reduced, dim): dtype = np.dtype(any_numpy_dtype_reduced) # create 0-dim array of given dtype; casts "1" to correct dtype fill_array = np.array(1, dtype=dtype) # expand to desired dimension: for _ in range(dim): fill_array = np.expand_dims(fill_array, 0) # test against 1-dimensional case expected_dtype, expected_missing_value = maybe_promote( dtype, np.array([1], dtype=dtype)) result_dtype, result_missing_value = maybe_promote(dtype, fill_array) assert result_dtype == expected_dtype _assert_match(result_missing_value, expected_missing_value)
def fill_value(self, value): if not is_scalar(value): raise ValueError('fill_value must be a scalar') # if the specified value triggers type promotion, raise ValueError new_dtype, fill_value = maybe_promote(self.dtype, value) if is_dtype_equal(self.dtype, new_dtype): self._fill_value = fill_value else: msg = 'unable to set fill_value {0} to {1} dtype' raise ValueError(msg.format(value, self.dtype))
def fill_value(self, value): if not is_scalar(value): raise ValueError('fill_value must be a scalar') # if the specified value triggers type promotion, raise ValueError new_dtype, fill_value = maybe_promote(self.dtype, value) if is_dtype_equal(self.dtype, new_dtype): self._fill_value = fill_value else: msg = 'unable to set fill_value {fill} to {dtype} dtype' raise ValueError(msg.format(fill=value, dtype=self.dtype))
def test_maybe_promote_dimensions(any_numpy_dtype_reduced, dim): dtype = np.dtype(any_numpy_dtype_reduced) # create 0-dim array of given dtype; casts "1" to correct dtype fill_array = np.array(1, dtype=dtype) # expand to desired dimension: for _ in range(dim): fill_array = np.expand_dims(fill_array, 0) if dtype != object: # test against 1-dimensional case with pytest.raises(ValueError, match="fill_value must be a scalar"): maybe_promote(dtype, np.array([1], dtype=dtype)) with pytest.raises(ValueError, match="fill_value must be a scalar"): maybe_promote(dtype, fill_array) else: expected_dtype, expected_missing_value = maybe_promote( dtype, np.array([1], dtype=dtype) ) result_dtype, result_missing_value = maybe_promote(dtype, fill_array) assert result_dtype == expected_dtype _assert_match(result_missing_value, expected_missing_value)
def _take_preprocess_indexer_and_fill_value( arr: np.ndarray, indexer: Optional[np.ndarray], axis: int, out: Optional[np.ndarray], fill_value, allow_fill: bool, ): mask_info = None if indexer is None: indexer = np.arange(arr.shape[axis], dtype=np.int64) dtype, fill_value = arr.dtype, arr.dtype.type() else: indexer = ensure_int64(indexer, copy=False) if not allow_fill: dtype, fill_value = arr.dtype, arr.dtype.type() mask_info = None, False else: # check for promotion based on types only (do this first because # it's faster than computing a mask) dtype, fill_value = maybe_promote(arr.dtype, fill_value) if dtype != arr.dtype and (out is None or out.dtype != dtype): # check if promotion is actually required based on indexer mask = indexer == -1 # error: Item "bool" of "Union[Any, bool]" has no attribute "any" # [union-attr] needs_masking = mask.any() # type: ignore[union-attr] # error: Incompatible types in assignment (expression has type # "Tuple[Union[Any, bool], Any]", variable has type # "Optional[Tuple[None, bool]]") mask_info = mask, needs_masking # type: ignore[assignment] if needs_masking: if out is not None and out.dtype != dtype: raise TypeError("Incompatible type for fill_value") else: # if not, then depromote, set fill_value to dummy # (it won't be used but we don't want the cython code # to crash when trying to cast it to dtype) dtype, fill_value = arr.dtype, arr.dtype.type() return indexer, dtype, fill_value, mask_info
def get_new_values(self, values, fill_value=None): if values.ndim == 1: values = values[:, np.newaxis] sorted_values = self._make_sorted_values(values) # place the values length, width = self.full_shape stride = values.shape[1] result_width = width * stride result_shape = (length, result_width) mask = self.mask mask_all = self.mask_all # we can simply reshape if we don't have a mask if mask_all and len(values): # TODO: Under what circumstances can we rely on sorted_values # matching values? When that holds, we can slice instead # of take (in particular for EAs) new_values = ( sorted_values.reshape(length, width, stride) .swapaxes(1, 2) .reshape(result_shape) ) new_mask = np.ones(result_shape, dtype=bool) return new_values, new_mask # if our mask is all True, then we can use our existing dtype if mask_all: dtype = values.dtype new_values = np.empty(result_shape, dtype=dtype) name = np.dtype(dtype).name else: dtype, fill_value = maybe_promote(values.dtype, fill_value) if isinstance(dtype, ExtensionDtype): # GH#41875 cls = dtype.construct_array_type() new_values = cls._empty(result_shape, dtype=dtype) new_values[:] = fill_value name = dtype.name else: new_values = np.empty(result_shape, dtype=dtype) new_values.fill(fill_value) name = np.dtype(dtype).name new_mask = np.zeros(result_shape, dtype=bool) # we need to convert to a basic dtype # and possibly coerce an input to our output dtype # e.g. ints -> floats if needs_i8_conversion(values.dtype): sorted_values = sorted_values.view("i8") new_values = new_values.view("i8") elif is_bool_dtype(values.dtype): sorted_values = sorted_values.astype("object") new_values = new_values.astype("object") else: sorted_values = sorted_values.astype(name, copy=False) # fill in our values & mask libreshape.unstack( sorted_values, mask.view("u1"), stride, length, width, new_values, new_mask.view("u1"), ) # reconstruct dtype if needed if needs_i8_conversion(values.dtype): # view as datetime64 so we can wrap in DatetimeArray and use # DTA's view method new_values = new_values.view("M8[ns]") new_values = ensure_wrapped_if_datetimelike(new_values) new_values = new_values.view(values.dtype) return new_values, new_mask
def take_nd( arr: ArrayLike, indexer, axis: int = 0, fill_value=lib.no_default, allow_fill: bool = True, ) -> ArrayLike: """ Specialized Cython take which sets NaN values in one pass This dispatches to ``take`` defined on ExtensionArrays. It does not currently dispatch to ``SparseArray.take`` for sparse ``arr``. Note: this function assumes that the indexer is a valid(ated) indexer with no out of bound indices. Parameters ---------- arr : np.ndarray or ExtensionArray Input array. indexer : ndarray 1-D array of indices to take, subarrays corresponding to -1 value indices are filed with fill_value axis : int, default 0 Axis to take from fill_value : any, default np.nan Fill value to replace -1 values with allow_fill : bool, default True If False, indexer is assumed to contain no -1 values so no filling will be done. This short-circuits computation of a mask. Result is undefined if allow_fill == False and -1 is present in indexer. Returns ------- subarray : np.ndarray or ExtensionArray May be the same type as the input, or cast to an ndarray. """ if fill_value is lib.no_default: fill_value = na_value_for_dtype(arr.dtype, compat=False) elif isinstance(arr.dtype, np.dtype) and arr.dtype.kind in "mM": dtype, fill_value = maybe_promote(arr.dtype, fill_value) if arr.dtype != dtype: # EA.take is strict about returning a new object of the same type # so for that case cast upfront arr = arr.astype(dtype) if not isinstance(arr, np.ndarray): # i.e. ExtensionArray, # includes for EA to catch DatetimeArray, TimedeltaArray if not is_1d_only_ea_obj(arr): # i.e. DatetimeArray, TimedeltaArray arr = cast("NDArrayBackedExtensionArray", arr) return arr.take(indexer, fill_value=fill_value, allow_fill=allow_fill, axis=axis) return arr.take(indexer, fill_value=fill_value, allow_fill=allow_fill) arr = np.asarray(arr) return _take_nd_ndarray(arr, indexer, axis, fill_value, allow_fill)
def _check_promote(dtype, fill_value, boxed, box_dtype, expected_dtype, exp_val_for_scalar=None, exp_val_for_array=None): """ Auxiliary function to unify testing of scalar/array promotion. Parameters ---------- dtype : dtype The value to pass on as the first argument to maybe_promote. fill_value : scalar The value to pass on as the second argument to maybe_promote, either as a scalar, or boxed into an array (depending on the parameter `boxed`). boxed : Boolean Parameter whether fill_value should be passed to maybe_promote directly, or wrapped in an array (of dtype box_dtype). box_dtype : dtype The dtype to enforce when wrapping fill_value into an np.array. expected_dtype : dtype The expected dtype returned by maybe_promote (by design this is the same regardless of whether fill_value was passed as a scalar or in an array!). exp_val_for_scalar : scalar The expected value for the (potentially upcast) fill_value returned by maybe_promote. exp_val_for_array : scalar The expected missing value marker for the expected_dtype (which is returned by maybe_promote when it receives an array). """ assert is_scalar(fill_value) if boxed: # in this case, we pass on fill_value wrapped in an array of specified # box_dtype; the expected value returned from maybe_promote is the # missing value marker for the returned dtype. fill_array = np.array([fill_value], dtype=box_dtype) result_dtype, result_fill_value = maybe_promote(dtype, fill_array) expected_fill_value = exp_val_for_array else: # here, we pass on fill_value as a scalar directly; the expected value # returned from maybe_promote is fill_value, potentially upcast to the # returned dtype. result_dtype, result_fill_value = maybe_promote(dtype, fill_value) expected_fill_value = exp_val_for_scalar _safe_dtype_assert(result_dtype, expected_dtype) # for equal values, also check type (relevant e.g. for int vs float, resp. # for different datetimes and timedeltas) match_value = (result_fill_value == expected_fill_value # disabled type check due to too many xfails; GH 23982/25425 # and type(result_fill_value) == type(expected_fill_value) ) # for missing values, None == None and iNaT == iNaT (which is checked # through match_value above), but np.nan != np.nan and pd.NaT != pd.NaT match_missing = ((result_fill_value is np.nan and expected_fill_value is np.nan) or (result_fill_value is NaT and expected_fill_value is NaT)) assert match_value or match_missing
def putmask_smart(values: np.ndarray, mask: np.ndarray, new) -> np.ndarray: """ Return a new ndarray, try to preserve dtype if possible. Parameters ---------- values : np.ndarray `values`, updated in-place. mask : np.ndarray[bool] Applies to both sides (array like). new : `new values` either scalar or an array like aligned with `values` Returns ------- values : ndarray with updated values this *may* be a copy of the original See Also -------- ndarray.putmask """ # we cannot use np.asarray() here as we cannot have conversions # that numpy does when numeric are mixed with strings # n should be the length of the mask or a scalar here if not is_list_like(new): new = np.repeat(new, len(mask)) # see if we are only masking values that if putted # will work in the current dtype try: nn = new[mask] except TypeError: # TypeError: only integer scalar arrays can be converted to a scalar index pass else: # make sure that we have a nullable type if we have nulls if not isna_compat(values, nn[0]): pass elif not (is_float_dtype(nn.dtype) or is_integer_dtype(nn.dtype)): # only compare integers/floats pass elif not (is_float_dtype(values.dtype) or is_integer_dtype(values.dtype)): # only compare integers/floats pass else: # we ignore ComplexWarning here with warnings.catch_warnings(record=True): warnings.simplefilter("ignore", np.ComplexWarning) nn_at = nn.astype(values.dtype) comp = nn == nn_at if is_list_like(comp) and comp.all(): nv = values.copy() nv[mask] = nn_at return nv new = np.asarray(new) if values.dtype.kind == new.dtype.kind: # preserves dtype if possible return _putmask_preserve(values, new, mask) # change the dtype if needed dtype, _ = maybe_promote(new.dtype) values = values.astype(dtype) return _putmask_preserve(values, new, mask)
def get_new_values(self): values = self.values # place the values length, width = self.full_shape stride = values.shape[1] result_width = width * stride result_shape = (length, result_width) mask = self.mask mask_all = mask.all() # we can simply reshape if we don't have a mask if mask_all and len(values): new_values = (self.sorted_values .reshape(length, width, stride) .swapaxes(1, 2) .reshape(result_shape) ) new_mask = np.ones(result_shape, dtype=bool) return new_values, new_mask # if our mask is all True, then we can use our existing dtype if mask_all: dtype = values.dtype new_values = np.empty(result_shape, dtype=dtype) else: dtype, fill_value = maybe_promote(values.dtype, self.fill_value) new_values = np.empty(result_shape, dtype=dtype) new_values.fill(fill_value) new_mask = np.zeros(result_shape, dtype=bool) name = np.dtype(dtype).name sorted_values = self.sorted_values # we need to convert to a basic dtype # and possibly coerce an input to our output dtype # e.g. ints -> floats if needs_i8_conversion(values): sorted_values = sorted_values.view('i8') new_values = new_values.view('i8') name = 'int64' elif is_bool_dtype(values): sorted_values = sorted_values.astype('object') new_values = new_values.astype('object') name = 'object' else: sorted_values = sorted_values.astype(name, copy=False) # fill in our values & mask f = getattr(_reshape, "unstack_{name}".format(name=name)) f(sorted_values, mask.view('u1'), stride, length, width, new_values, new_mask.view('u1')) # reconstruct dtype if needed if needs_i8_conversion(values): new_values = new_values.view(values.dtype) return new_values, new_mask
def _check_promote( dtype, fill_value, boxed, box_dtype, expected_dtype, exp_val_for_scalar=None, exp_val_for_array=None, ): """ Auxiliary function to unify testing of scalar/array promotion. Parameters ---------- dtype : dtype The value to pass on as the first argument to maybe_promote. fill_value : scalar The value to pass on as the second argument to maybe_promote, either as a scalar, or boxed into an array (depending on the parameter `boxed`). boxed : Boolean Parameter whether fill_value should be passed to maybe_promote directly, or wrapped in an array (of dtype box_dtype). box_dtype : dtype The dtype to enforce when wrapping fill_value into an np.array. expected_dtype : dtype The expected dtype returned by maybe_promote (by design this is the same regardless of whether fill_value was passed as a scalar or in an array!). exp_val_for_scalar : scalar The expected value for the (potentially upcast) fill_value returned by maybe_promote. exp_val_for_array : scalar The expected missing value marker for the expected_dtype (which is returned by maybe_promote when it receives an array). """ assert is_scalar(fill_value) if boxed: # in this case, we pass on fill_value wrapped in an array of specified # box_dtype; the expected value returned from maybe_promote is the # missing value marker for the returned dtype. fill_array = np.array([fill_value], dtype=box_dtype) result_dtype, result_fill_value = maybe_promote(dtype, fill_array) expected_fill_value = exp_val_for_array else: # here, we pass on fill_value as a scalar directly; the expected value # returned from maybe_promote is fill_value, potentially upcast to the # returned dtype. result_dtype, result_fill_value = maybe_promote(dtype, fill_value) expected_fill_value = exp_val_for_scalar _safe_dtype_assert(result_dtype, expected_dtype) # GH#23982/25425 require the same type in addition to equality/NA-ness res_type = type(result_fill_value) ex_type = type(expected_fill_value) assert res_type == ex_type match_value = result_fill_value == expected_fill_value # Note: type check above ensures that we have the _same_ NA value # for missing values, None == None and iNaT == iNaT (which is checked # through match_value above), but np.nan != np.nan and pd.NaT != pd.NaT match_missing = isna(result_fill_value) and isna(expected_fill_value) assert match_value or match_missing