def fillna(self, value=None, method=None, limit=None): # TODO(#20300) # To avoid converting to object, we re-implement here with the changes # 1. Passing `_data` to func instead of self.astype(object) # 2. Re-boxing output of 1. # #20300 should let us do this kind of logic on ExtensionArray.fillna # and we can use it. if isinstance(value, ABCSeries): value = value._values value, method = validate_fillna_kwargs(value, method) mask = self.isna() if is_array_like(value): if len(value) != len(self): raise ValueError("Length of 'value' does not match. Got ({}) " " expected {}".format(len(value), len(self))) value = value[mask] if mask.any(): if method is not None: func = pad_1d if method == 'pad' else backfill_1d new_values = func(self._data, limit=limit, mask=mask) new_values = type(self)(new_values, freq=self.freq) else: # fill with value new_values = self.copy() new_values[mask] = value else: new_values = self.copy() return new_values
def __setitem__(self, key, value): value = extract_array(value, extract_numpy=True) if isinstance(value, type(self)): # extract_array doesn't extract PandasArray subclasses value = value._ndarray key = check_array_indexer(self, key) scalar_key = lib.is_scalar(key) scalar_value = lib.is_scalar(value) if scalar_key and not scalar_value: raise ValueError("setting an array element with a sequence.") # validate new items if scalar_value: if isna(value): value = StringDtype.na_value elif not isinstance(value, str): raise ValueError( f"Cannot set non-string value '{value}' into a StringArray." ) else: if not is_array_like(value): value = np.asarray(value, dtype=object) if len(value) and not lib.is_string_array(value, skipna=True): raise ValueError("Must provide strings.") super().__setitem__(key, value)
def is_bool_indexer(key: Any) -> bool: """ Check whether `key` is a valid boolean indexer. Parameters ---------- key : Any Only list-likes may be considered boolean indexers. All other types are not considered a boolean indexer. For array-like input, boolean ndarrays or ExtensionArrays with ``_is_boolean`` set are considered boolean indexers. Returns ------- bool Whether `key` is a valid boolean indexer. Raises ------ ValueError When the array is an object-dtype ndarray or ExtensionArray and contains missing values. See Also -------- check_bool_array_indexer : Check that `key` is a valid mask for an array, and convert to an ndarray. """ na_msg = "cannot mask with array containing NA / NaN values" if isinstance(key, (ABCSeries, np.ndarray, ABCIndex)) or ( is_array_like(key) and is_extension_array_dtype(key.dtype) ): if key.dtype == np.object_: key = np.asarray(values_from_object(key)) if not lib.is_bool_array(key): if isna(key).any(): raise ValueError(na_msg) return False return True elif is_bool_dtype(key.dtype): # an ndarray with bool-dtype by definition has no missing values. # So we only need to check for NAs in ExtensionArrays if is_extension_array_dtype(key.dtype): if np.any(key.isna()): raise ValueError(na_msg) return True elif isinstance(key, list): try: arr = np.asarray(key) return arr.dtype == np.bool_ and len(arr) == len(key) except TypeError: # pragma: no cover return False return False
def random_state(state: RandomState | None = None): """ Helper function for processing random_state arguments. Parameters ---------- state : int, array-like, BitGenerator, Generator, np.random.RandomState, None. If receives an int, array-like, or BitGenerator, passes to np.random.RandomState() as seed. If receives an np.random RandomState or Generator, just returns that unchanged. If receives `None`, returns np.random. If receives anything else, raises an informative ValueError. .. versionchanged:: 1.1.0 array-like and BitGenerator object now passed to np.random.RandomState() as seed Default None. Returns ------- np.random.RandomState or np.random.Generator. If state is None, returns np.random """ if ( is_integer(state) or is_array_like(state) or isinstance(state, np.random.BitGenerator) ): # error: Argument 1 to "RandomState" has incompatible type "Optional[Union[int, # Union[ExtensionArray, ndarray[Any, Any]], Generator, RandomState]]"; expected # "Union[None, Union[Union[_SupportsArray[dtype[Union[bool_, integer[Any]]]], # Sequence[_SupportsArray[dtype[Union[bool_, integer[Any]]]]], # Sequence[Sequence[_SupportsArray[dtype[Union[bool_, integer[Any]]]]]], # Sequence[Sequence[Sequence[_SupportsArray[dtype[Union[bool_, # integer[Any]]]]]]], # Sequence[Sequence[Sequence[Sequence[_SupportsArray[dtype[Union[bool_, # integer[Any]]]]]]]]], Union[bool, int, Sequence[Union[bool, int]], # Sequence[Sequence[Union[bool, int]]], Sequence[Sequence[Sequence[Union[bool, # int]]]], Sequence[Sequence[Sequence[Sequence[Union[bool, int]]]]]]], # BitGenerator]" return np.random.RandomState(state) # type: ignore[arg-type] elif isinstance(state, np.random.RandomState): return state elif isinstance(state, np.random.Generator): return state elif state is None: return np.random else: raise ValueError( "random_state must be an integer, array-like, a BitGenerator, Generator, " "a numpy RandomState, or None" )
def is_bool_indexer(key: Any) -> bool: """ Check whether `key` is a valid boolean indexer. Parameters ---------- key : Any Only list-likes may be considered boolean indexers. All other types are not considered a boolean indexer. For array-like input, boolean ndarrays or ExtensionArrays with ``_is_boolean`` set are considered boolean indexers. Returns ------- bool Whether `key` is a valid boolean indexer. Raises ------ ValueError When the array is an object-dtype ndarray or ExtensionArray and contains missing values. See Also -------- check_array_indexer : Check that `key` is a valid array to index, and convert to an ndarray. """ if isinstance(key, (ABCSeries, np.ndarray, ABCIndex)) or ( is_array_like(key) and is_extension_array_dtype(key.dtype) ): if key.dtype == np.object_: key = np.asarray(key) if not lib.is_bool_array(key): na_msg = "Cannot mask with non-boolean array containing NA / NaN values" if lib.infer_dtype(key) == "boolean" and isna(key).any(): # Don't raise on e.g. ["A", "B", np.nan], see # test_loc_getitem_list_of_labels_categoricalindex_with_na raise ValueError(na_msg) return False return True elif is_bool_dtype(key.dtype): return True elif isinstance(key, list): # check if np.array(key).dtype would be bool if len(key) > 0: if type(key) is not list: # GH#42461 cython will raise TypeError if we pass a subclass key = list(key) return lib.is_bool_list(key) return False
def check_value_size(value, mask: np.ndarray, length: int): """ Validate the size of the values passed to ExtensionArray.fillna. """ if is_array_like(value): if len(value) != length: raise ValueError( f"Length of 'value' does not match. Got ({len(value)}) " f" expected {length}") value = value[mask] return value
def fillna(self, value=None, method=None, limit=None): """ Fill NA/NaN values using the specified method. Parameters ---------- value : scalar, array-like If a scalar value is passed it is used to fill all missing values. Alternatively, an array-like 'value' can be given. It's expected that the array-like have the same length as 'self'. method : {'backfill', 'bfill', 'pad', 'ffill', None}, default None Method to use for filling holes in reindexed Series pad / ffill: propagate last valid observation forward to next valid backfill / bfill: use NEXT valid observation to fill gap. limit : int, default None If method is specified, this is the maximum number of consecutive NaN values to forward/backward fill. In other words, if there is a gap with more than this number of consecutive NaNs, it will only be partially filled. If method is not specified, this is the maximum number of entries along the entire axis where NaNs will be filled. Returns ------- ExtensionArray With NA/NaN filled. """ value, method = validate_fillna_kwargs(value, method) mask = self.isna() if is_array_like(value): if len(value) != len(self): raise ValueError( f"Length of 'value' does not match. Got ({len(value)}) " f"expected {len(self)}" ) value = value[mask] if mask.any(): if method is not None: func = pad_1d if method == "pad" else backfill_1d new_values = func(self.astype(object), limit=limit, mask=mask) new_values = self._from_sequence(new_values, dtype=self.dtype) else: # fill with value new_values = self.copy() new_values[mask] = value else: new_values = self.copy() return new_values
def is_bool_indexer(key): # type: (Any) -> bool """ Check whether `key` is a valid boolean indexer. Parameters ---------- key : Any Only list-likes may be considered boolean indexers. All other types are not considered a boolean indexer. For array-like input, boolean ndarrays or ExtensionArrays with ``_is_boolean`` set are considered boolean indexers. Returns ------- bool Raises ------ ValueError When the array is an object-dtype ndarray or ExtensionArray and contains missing values. """ na_msg = 'cannot index with vector containing NA / NaN values' if (isinstance(key, (ABCSeries, np.ndarray, ABCIndex)) or (is_array_like(key) and is_extension_array_dtype(key.dtype))): if key.dtype == np.object_: key = np.asarray(values_from_object(key)) if not lib.is_bool_array(key): if isna(key).any(): raise ValueError(na_msg) return False return True elif is_bool_dtype(key.dtype): # an ndarray with bool-dtype by definition has no missing values. # So we only need to check for NAs in ExtensionArrays if is_extension_array_dtype(key.dtype): if np.any(key.isna()): raise ValueError(na_msg) return True elif isinstance(key, list): try: arr = np.asarray(key) return arr.dtype == np.bool_ and len(arr) == len(key) except TypeError: # pragma: no cover return False return False
def __getitem__(self, arr: Any) -> Any: _logger.debug("RLEArray.__getitem__(arr=%s(...))", type(arr).__name__) if isinstance(arr, tuple): # This is for example called by Pandas as values[:, None] to prepare the data for the cythonized # aggregation. Since we do not want to support the the aggregation over decompression, it is OK to not # implement this. raise NotImplementedError( "__getitem__ does currently only work w/ a single parameter") if is_array_like(arr) or isinstance(arr, list): arr = _normalize_arraylike_indexing(arr, len(self)) if arr.dtype == np.bool_: arr = np.arange(len(self))[arr] else: arr = arr.astype(int) if len(arr) == 0: return RLEArray(data=self._data[[]], positions=self._positions[[]]) arr[arr < 0] += len(self) data, positions = take( data=self._data, positions=self._positions, indices=arr, allow_fill=False, fill_value=self.dtype.na_value, ) return RLEArray(data=data, positions=positions) elif isinstance(arr, slice): data, positions = find_slice(self._data, self._positions, arr) parent_normalized = NormalizedSlice.from_slice( get_len(self._projection.master.positions), self._projection.projection_slice, ) child_normalized = NormalizedSlice.from_slice(len(self), arr) subslice = parent_normalized.project(child_normalized).to_slice() result = RLEArray(data=data, positions=positions) self._projection.master.register_change(result, subslice) return result else: if arr < 0: arr = arr + len(self) return find_single_index(self._data, self._positions, arr)
def random_state(state=None): """ Helper function for processing random_state arguments. Parameters ---------- state : int, array-like, BitGenerator (NumPy>=1.17), np.random.RandomState, None. If receives an int, array-like, or BitGenerator, passes to np.random.RandomState() as seed. If receives an np.random.RandomState object, just returns object. If receives `None`, returns np.random. If receives anything else, raises an informative ValueError. ..versionchanged:: 1.1.0 array-like and BitGenerator (for NumPy>=1.17) object now passed to np.random.RandomState() as seed Default None. Returns ------- np.random.RandomState """ if ( is_integer(state) or is_array_like(state) or (not _np_version_under1p17 and isinstance(state, np.random.BitGenerator)) ): return np.random.RandomState(state) elif isinstance(state, np.random.RandomState): return state elif state is None: return np.random else: raise ValueError( ( "random_state must be an integer, array-like, a BitGenerator, " "a numpy RandomState, or None" ) )
def __init__(self, data, sparse_index=None, index=None, fill_value=None, kind='integer', dtype=None, copy=False): from pandas.core.internals import SingleBlockManager if isinstance(data, SingleBlockManager): data = data.internal_values() if fill_value is None and isinstance(dtype, SparseDtype): fill_value = dtype.fill_value if isinstance(data, (type(self), ABCSparseSeries)): # disable normal inference on dtype, sparse_index, & fill_value if sparse_index is None: sparse_index = data.sp_index if fill_value is None: fill_value = data.fill_value if dtype is None: dtype = data.dtype # TODO: make kind=None, and use data.kind? data = data.sp_values # Handle use-provided dtype if isinstance(dtype, compat.string_types): # Two options: dtype='int', regular numpy dtype # or dtype='Sparse[int]', a sparse dtype try: dtype = SparseDtype.construct_from_string(dtype) except TypeError: dtype = pandas_dtype(dtype) if isinstance(dtype, SparseDtype): if fill_value is None: fill_value = dtype.fill_value dtype = dtype.subtype if index is not None and not is_scalar(data): raise Exception("must only pass scalars with an index ") if is_scalar(data): if index is not None: if data is None: data = np.nan if index is not None: npoints = len(index) elif sparse_index is None: npoints = 1 else: npoints = sparse_index.length dtype = infer_dtype_from_scalar(data)[0] data = construct_1d_arraylike_from_scalar(data, npoints, dtype) if dtype is not None: dtype = pandas_dtype(dtype) # TODO: disentangle the fill_value dtype inference from # dtype inference if data is None: # XXX: What should the empty dtype be? Object or float? data = np.array([], dtype=dtype) if not is_array_like(data): try: # probably shared code in sanitize_series from pandas.core.series import _sanitize_array data = _sanitize_array(data, index=None) except ValueError: # NumPy may raise a ValueError on data like [1, []] # we retry with object dtype here. if dtype is None: dtype = object data = np.atleast_1d(np.asarray(data, dtype=dtype)) else: raise if copy: # TODO: avoid double copy when dtype forces cast. data = data.copy() if fill_value is None: fill_value_dtype = data.dtype if dtype is None else dtype if fill_value_dtype is None: fill_value = np.nan else: fill_value = na_value_for_dtype(fill_value_dtype) if isinstance(data, type(self)) and sparse_index is None: sparse_index = data._sparse_index sparse_values = np.asarray(data.sp_values, dtype=dtype) elif sparse_index is None: sparse_values, sparse_index, fill_value = make_sparse( data, kind=kind, fill_value=fill_value, dtype=dtype) else: sparse_values = np.asarray(data, dtype=dtype) if len(sparse_values) != sparse_index.npoints: raise AssertionError( "Non array-like type {type} must " "have the same length as the index".format( type=type(sparse_values))) self._sparse_index = sparse_index self._sparse_values = sparse_values self._dtype = SparseDtype(sparse_values.dtype, fill_value)
def take(self, indices: Sequence[int], allow_fill: bool = False, fill_value: Any = None): """ Take elements from an array. Parameters ---------- indices : sequence of int Indices to be taken. allow_fill : bool, default False How to handle negative values in `indices`. * False: negative values in `indices` indicate positional indices from the right (the default). This is similar to :func:`numpy.take`. * True: negative values in `indices` indicate missing values. These values are set to `fill_value`. Any other other negative values raise a ``ValueError``. fill_value : any, optional Fill value to use for NA-indices when `allow_fill` is True. This may be ``None``, in which case the default NA value for the type, ``self.dtype.na_value``, is used. For many ExtensionArrays, there will be two representations of `fill_value`: a user-facing "boxed" scalar, and a low-level physical NA value. `fill_value` should be the user-facing version, and the implementation should handle translating that to the physical version for processing the take if necessary. Returns ------- ExtensionArray Raises ------ IndexError When the indices are out of bounds for the array. ValueError When `indices` contains negative values other than ``-1`` and `allow_fill` is True. See Also -------- numpy.take api.extensions.take Notes ----- ExtensionArray.take is called by ``Series.__getitem__``, ``.loc``, ``iloc``, when `indices` is a sequence of values. Additionally, it's called by :meth:`Series.reindex`, or any other method that causes realignment, with a `fill_value`. """ # TODO: Remove once we got rid of the (indices < 0) check if not is_array_like(indices): indices_array = np.asanyarray(indices) else: # error: Incompatible types in assignment (expression has type # "Sequence[int]", variable has type "ndarray") indices_array = indices # type: ignore[assignment] if len(self._data) == 0 and (indices_array >= 0).any(): raise IndexError("cannot do a non-empty take") if indices_array.size > 0 and indices_array.max() >= len(self._data): raise IndexError("out of bounds value in 'indices'.") if allow_fill: fill_mask = indices_array < 0 if fill_mask.any(): validate_indices(indices_array, len(self._data)) # TODO(ARROW-9433): Treat negative indices as NULL indices_array = pa.array(indices_array, mask=fill_mask) result = self._data.take(indices_array) if isna(fill_value): return type(self)(result) # TODO: ArrowNotImplementedError: Function fill_null has no # kernel matching input types (array[string], scalar[string]) result = type(self)(result) result[fill_mask] = fill_value return result # return type(self)(pc.fill_null(result, pa.scalar(fill_value))) else: # Nothing to fill return type(self)(self._data.take(indices)) else: # allow_fill=False # TODO(ARROW-9432): Treat negative indices as indices from the right. if (indices_array < 0).any(): # Don't modify in-place indices_array = np.copy(indices_array) indices_array[indices_array < 0] += len(self._data) return type(self)(self._data.take(indices_array))
def align_method_FRAME( left, right, axis, flex: bool | None = False, level: Level = None ): """ Convert rhs to meet lhs dims if input is list, tuple or np.ndarray. Parameters ---------- left : DataFrame right : Any axis : int, str, or None flex : bool or None, default False Whether this is a flex op, in which case we reindex. None indicates not to check for alignment. level : int or level name, default None Returns ------- left : DataFrame right : Any """ def to_series(right): msg = "Unable to coerce to Series, length must be {req_len}: given {given_len}" if axis is not None and left._get_axis_name(axis) == "index": if len(left.index) != len(right): raise ValueError( msg.format(req_len=len(left.index), given_len=len(right)) ) right = left._constructor_sliced(right, index=left.index) else: if len(left.columns) != len(right): raise ValueError( msg.format(req_len=len(left.columns), given_len=len(right)) ) right = left._constructor_sliced(right, index=left.columns) return right if isinstance(right, np.ndarray): if right.ndim == 1: right = to_series(right) elif right.ndim == 2: if right.shape == left.shape: right = left._constructor(right, index=left.index, columns=left.columns) elif right.shape[0] == left.shape[0] and right.shape[1] == 1: # Broadcast across columns right = np.broadcast_to(right, left.shape) right = left._constructor(right, index=left.index, columns=left.columns) elif right.shape[1] == left.shape[1] and right.shape[0] == 1: # Broadcast along rows right = to_series(right[0, :]) else: raise ValueError( "Unable to coerce to DataFrame, shape " f"must be {left.shape}: given {right.shape}" ) elif right.ndim > 2: raise ValueError( "Unable to coerce to Series/DataFrame, " f"dimension must be <= 2: {right.shape}" ) elif is_list_like(right) and not isinstance(right, (ABCSeries, ABCDataFrame)): # GH 36702. Raise when attempting arithmetic with list of array-like. if any(is_array_like(el) for el in right): raise ValueError( f"Unable to coerce list of {type(right[0])} to Series/DataFrame" ) # GH17901 right = to_series(right) if flex is not None and isinstance(right, ABCDataFrame): if not left._indexed_same(right): if flex: left, right = left.align(right, join="outer", level=level, copy=False) else: raise ValueError( "Can only compare identically-labeled DataFrame objects" ) elif isinstance(right, ABCSeries): # axis=1 is default for DataFrame-with-Series op axis = left._get_axis_number(axis) if axis is not None else 1 if not flex: if not left.axes[axis].equals(right.index): warnings.warn( "Automatic reindexing on DataFrame vs Series comparisons " "is deprecated and will raise ValueError in a future version. " "Do `left, right = left.align(right, axis=1, copy=False)` " "before e.g. `left == right`", FutureWarning, stacklevel=5, ) left, right = left.align( right, join="outer", axis=axis, level=level, copy=False ) right = _maybe_align_series_as_frame(left, right, axis) return left, right
def __init__(self, data, sparse_index=None, index=None, fill_value=None, kind='integer', dtype=None, copy=False): from pandas.core.internals import SingleBlockManager if isinstance(data, SingleBlockManager): data = data.internal_values() if fill_value is None and isinstance(dtype, SparseDtype): fill_value = dtype.fill_value if isinstance(data, (type(self), ABCSparseSeries)): # disable normal inference on dtype, sparse_index, & fill_value if sparse_index is None: sparse_index = data.sp_index if fill_value is None: fill_value = data.fill_value if dtype is None: dtype = data.dtype # TODO: make kind=None, and use data.kind? data = data.sp_values # Handle use-provided dtype if isinstance(dtype, compat.string_types): # Two options: dtype='int', regular numpy dtype # or dtype='Sparse[int]', a sparse dtype try: dtype = SparseDtype.construct_from_string(dtype) except TypeError: dtype = pandas_dtype(dtype) if isinstance(dtype, SparseDtype): if fill_value is None: fill_value = dtype.fill_value dtype = dtype.subtype if index is not None and not is_scalar(data): raise Exception("must only pass scalars with an index ") if is_scalar(data): if index is not None: if data is None: data = np.nan if index is not None: npoints = len(index) elif sparse_index is None: npoints = 1 else: npoints = sparse_index.length dtype = infer_dtype_from_scalar(data)[0] data = construct_1d_arraylike_from_scalar( data, npoints, dtype ) if dtype is not None: dtype = pandas_dtype(dtype) # TODO: disentangle the fill_value dtype inference from # dtype inference if data is None: # XXX: What should the empty dtype be? Object or float? data = np.array([], dtype=dtype) if not is_array_like(data): try: # probably shared code in sanitize_series from pandas.core.series import _sanitize_array data = _sanitize_array(data, index=None) except ValueError: # NumPy may raise a ValueError on data like [1, []] # we retry with object dtype here. if dtype is None: dtype = object data = np.atleast_1d(np.asarray(data, dtype=dtype)) else: raise if copy: # TODO: avoid double copy when dtype forces cast. data = data.copy() if fill_value is None: fill_value_dtype = data.dtype if dtype is None else dtype if fill_value_dtype is None: fill_value = np.nan else: fill_value = na_value_for_dtype(fill_value_dtype) if isinstance(data, type(self)) and sparse_index is None: sparse_index = data._sparse_index sparse_values = np.asarray(data.sp_values, dtype=dtype) elif sparse_index is None: sparse_values, sparse_index, fill_value = make_sparse( data, kind=kind, fill_value=fill_value, dtype=dtype ) else: sparse_values = np.asarray(data, dtype=dtype) if len(sparse_values) != sparse_index.npoints: raise AssertionError("Non array-like type {type} must " "have the same length as the index" .format(type=type(sparse_values))) self._sparse_index = sparse_index self._sparse_values = sparse_values self._dtype = SparseDtype(sparse_values.dtype, fill_value)
def check_array_indexer(array: AnyArrayLike, indexer: Any) -> Any: """ Check if `indexer` is a valid array indexer for `array`. For a boolean mask, `array` and `indexer` are checked to have the same length. The dtype is validated, and if it is an integer or boolean ExtensionArray, it is checked if there are missing values present, and it is converted to the appropriate numpy array. Other dtypes will raise an error. Non-array indexers (integer, slice, Ellipsis, tuples, ..) are passed through as is. .. versionadded:: 1.0.0 Parameters ---------- array : array-like The array that is being indexed (only used for the length). indexer : array-like or list-like The array-like that's used to index. List-like input that is not yet a numpy array or an ExtensionArray is converted to one. Other input types are passed through as is. Returns ------- numpy.ndarray The validated indexer as a numpy array that can be used to index. Raises ------ IndexError When the lengths don't match. ValueError When `indexer` cannot be converted to a numpy ndarray to index (e.g. presence of missing values). See Also -------- api.types.is_bool_dtype : Check if `key` is of boolean dtype. Examples -------- When checking a boolean mask, a boolean ndarray is returned when the arguments are all valid. >>> mask = pd.array([True, False]) >>> arr = pd.array([1, 2]) >>> pd.api.indexers.check_array_indexer(arr, mask) array([ True, False]) An IndexError is raised when the lengths don't match. >>> mask = pd.array([True, False, True]) >>> pd.api.indexers.check_array_indexer(arr, mask) Traceback (most recent call last): ... IndexError: Boolean index has wrong length: 3 instead of 2. NA values in a boolean array are treated as False. >>> mask = pd.array([True, pd.NA]) >>> pd.api.indexers.check_array_indexer(arr, mask) array([ True, False]) A numpy boolean mask will get passed through (if the length is correct): >>> mask = np.array([True, False]) >>> pd.api.indexers.check_array_indexer(arr, mask) array([ True, False]) Similarly for integer indexers, an integer ndarray is returned when it is a valid indexer, otherwise an error is (for integer indexers, a matching length is not required): >>> indexer = pd.array([0, 2], dtype="Int64") >>> arr = pd.array([1, 2, 3]) >>> pd.api.indexers.check_array_indexer(arr, indexer) array([0, 2]) >>> indexer = pd.array([0, pd.NA], dtype="Int64") >>> pd.api.indexers.check_array_indexer(arr, indexer) Traceback (most recent call last): ... ValueError: Cannot index with an integer indexer containing NA values For non-integer/boolean dtypes, an appropriate error is raised: >>> indexer = np.array([0., 2.], dtype="float64") >>> pd.api.indexers.check_array_indexer(arr, indexer) Traceback (most recent call last): ... IndexError: arrays used as indices must be of integer or boolean type """ from pandas.core.construction import array as pd_array # whatever is not an array-like is returned as-is (possible valid array # indexers that are not array-like: integer, slice, Ellipsis, None) # In this context, tuples are not considered as array-like, as they have # a specific meaning in indexing (multi-dimensional indexing) if is_list_like(indexer): if isinstance(indexer, tuple): return indexer else: return indexer # convert list-likes to array if not is_array_like(indexer): indexer = pd_array(indexer) if len(indexer) == 0: # empty list is converted to float array by pd.array indexer = np.array([], dtype=np.intp) dtype = indexer.dtype if is_bool_dtype(dtype): if is_extension_array_dtype(dtype): indexer = indexer.to_numpy(dtype=bool, na_value=False) else: indexer = np.asarray(indexer, dtype=bool) # GH26658 if len(indexer) != len(array): raise IndexError( f"Boolean index has wrong length: " f"{len(indexer)} instead of {len(array)}" ) elif is_integer_dtype(dtype): try: indexer = np.asarray(indexer, dtype=np.intp) except ValueError as err: raise ValueError( "Cannot index with an integer indexer containing NA values" ) from err else: raise IndexError("arrays used as indices must be of integer or boolean type") return indexer
def __init__( self, data, sparse_index=None, index=None, fill_value=None, kind="integer", dtype=None, copy=False, ): if fill_value is None and isinstance(dtype, SparseDtype): fill_value = dtype.fill_value if isinstance(data, type(self)): # disable normal inference on dtype, sparse_index, & fill_value if sparse_index is None: sparse_index = data.sp_index if fill_value is None: fill_value = data.fill_value if dtype is None: dtype = data.dtype # TODO: make kind=None, and use data.kind? data = data.sp_values # Handle use-provided dtype if isinstance(dtype, str): # Two options: dtype='int', regular numpy dtype # or dtype='Sparse[int]', a sparse dtype try: dtype = SparseDtype.construct_from_string(dtype) except TypeError: dtype = pandas_dtype(dtype) if isinstance(dtype, SparseDtype): if fill_value is None: fill_value = dtype.fill_value dtype = dtype.subtype if index is not None and not is_scalar(data): raise Exception("must only pass scalars with an index") if is_scalar(data): if index is not None and data is None: data = np.nan if index is not None: npoints = len(index) elif sparse_index is None: npoints = 1 else: npoints = sparse_index.length dtype = infer_dtype_from_scalar(data)[0] data = construct_1d_arraylike_from_scalar(data, npoints, dtype) if dtype is not None: dtype = pandas_dtype(dtype) # TODO: disentangle the fill_value dtype inference from # dtype inference if data is None: # TODO: What should the empty dtype be? Object or float? data = np.array([], dtype=dtype) if not is_array_like(data): try: # probably shared code in sanitize_series data = sanitize_array(data, index=None) except ValueError: # NumPy may raise a ValueError on data like [1, []] # we retry with object dtype here. if dtype is None: dtype = object data = np.atleast_1d(np.asarray(data, dtype=dtype)) else: raise if copy: # TODO: avoid double copy when dtype forces cast. data = data.copy() if fill_value is None: fill_value_dtype = data.dtype if dtype is None else dtype if fill_value_dtype is None: fill_value = np.nan else: fill_value = na_value_for_dtype(fill_value_dtype) if isinstance(data, type(self)) and sparse_index is None: sparse_index = data._sparse_index sparse_values = np.asarray(data.sp_values, dtype=dtype) elif sparse_index is None: data = extract_array(data, extract_numpy=True) if not isinstance(data, np.ndarray): # EA if is_datetime64tz_dtype(data.dtype): warnings.warn( f"Creating SparseArray from {data.dtype} data " "loses timezone information. Cast to object before " "sparse to retain timezone information.", UserWarning, stacklevel=2, ) data = np.asarray(data, dtype="datetime64[ns]") data = np.asarray(data) sparse_values, sparse_index, fill_value = make_sparse( data, kind=kind, fill_value=fill_value, dtype=dtype) else: sparse_values = np.asarray(data, dtype=dtype) if len(sparse_values) != sparse_index.npoints: raise AssertionError( f"Non array-like type {type(sparse_values)} must " "have the same length as the index") self._sparse_index = sparse_index self._sparse_values = sparse_values self._dtype = SparseDtype(sparse_values.dtype, fill_value)