def data_missing(allow_in_pandas, dtype): # For NumPy <1.16, np.array([np.nan, (1,)]) raises # ValueError: setting an array element with a sequence. if dtype.numpy_dtype == 'object': if _np_version_under1p16: raise pytest.skip("Skipping for NumPy <1.16") return PandasArray(np.array([np.nan, (1, )])) return PandasArray(np.array([np.nan, 1.0]))
def data_missing_for_sorting(allow_in_pandas, dtype): """Length-3 array with a known sort order. This should be three items [B, NA, A] with A < B and NA missing. """ if dtype.numpy_dtype == 'object': return PandasArray(np.array([(1, ), np.nan, (0, )])) return PandasArray(np.array([1, np.nan, 0]))
def data_for_sorting(allow_in_pandas, dtype): """Length-3 array with a known sort order. This should be three items [B, C, A] with A < B < C """ if dtype.numpy_dtype == 'object': # Use an empty tuple for first element, then remove, # to disable np.array's shape inference. return PandasArray(np.array([(), (2, ), (3, ), (1, )])[1:]) return PandasArray(np.array([1, 2, 0]))
def data_missing_for_sorting(allow_in_pandas): """Length-3 array with a known sort order. This should be three items [B, NA, A] with A < B and NA missing. """ return PandasArray(np.array([1, np.nan, 0]))
def data_for_sorting(allow_in_pandas): """Length-3 array with a known sort order. This should be three items [B, C, A] with A < B < C """ return PandasArray(np.array([1, 2, 0]))
def data_for_grouping(allow_in_pandas): """Data for factorization, grouping, and unique tests. Expected to be like [B, B, NA, NA, A, A, B, C] Where A < B < C and NA is missing """ a, b, c = np.arange(3) return PandasArray(np.array([b, b, np.nan, np.nan, a, a, b, c]))
def data_for_grouping(allow_in_pandas, dtype): """Data for factorization, grouping, and unique tests. Expected to be like [B, B, NA, NA, A, A, B, C] Where A < B < C and NA is missing """ if dtype.numpy_dtype == 'object': a, b, c = (1, ), (2, ), (3, ) else: a, b, c = np.arange(3) return PandasArray(np.array([b, b, np.nan, np.nan, a, a, b, c]))
def array(self) -> ExtensionArray: """ The ExtensionArray of the data backing this Series or Index. .. versionadded:: 0.24.0 Returns ------- ExtensionArray An ExtensionArray of the values stored within. For extension types, this is the actual array. For NumPy native types, this is a thin (no copy) wrapper around :class:`numpy.ndarray`. ``.array`` differs ``.values`` which may require converting the data to a different form. See Also -------- Index.to_numpy : Similar method that always returns a NumPy array. Series.to_numpy : Similar method that always returns a NumPy array. Notes ----- This table lays out the different array types for each extension dtype within pandas. ================== ============================= dtype array type ================== ============================= category Categorical period PeriodArray interval IntervalArray IntegerNA IntegerArray datetime64[ns, tz] DatetimeArray ================== ============================= For any 3rd-party extension types, the array type will be an ExtensionArray. For all remaining dtypes ``.array`` will be a :class:`arrays.NumpyExtensionArray` wrapping the actual ndarray stored within. If you absolutely need a NumPy array (possibly with copying / coercing data), then use :meth:`Series.to_numpy` instead. Examples -------- For regular NumPy types like int, and float, a PandasArray is returned. >>> pd.Series([1, 2, 3]).array <PandasArray> [1, 2, 3] Length: 3, dtype: int64 For extension types, like Categorical, the actual ExtensionArray is returned >>> ser = pd.Series(pd.Categorical(['a', 'b', 'a'])) >>> ser.array [a, b, a] Categories (2, object): [a, b] """ # As a mixin, we depend on the mixing class having _values. # Special mixin syntax may be developed in the future: # https://github.com/python/typing/issues/246 result = self._values # type: ignore if is_datetime64_ns_dtype(result.dtype): from pandas.arrays import DatetimeArray result = DatetimeArray(result) elif is_timedelta64_ns_dtype(result.dtype): from pandas.arrays import TimedeltaArray result = TimedeltaArray(result) elif not is_extension_array_dtype(result.dtype): from pandas.core.arrays.numpy_ import PandasArray result = PandasArray(result) return result
def data(allow_in_pandas, dtype): if dtype.numpy_dtype == 'object': return pd.Series([(i, ) for i in range(100)]).array return PandasArray(np.arange(1, 101, dtype=dtype._dtype))
def data_missing(allow_in_pandas): return PandasArray(np.array([np.nan, 1.0]))
def data(allow_in_pandas, dtype): return PandasArray(np.arange(1, 101, dtype=dtype._dtype))
def data_missing(allow_in_pandas, dtype): if dtype.numpy_dtype == "object": return PandasArray(np.array([np.nan, (1, )], dtype=object)) return PandasArray(np.array([np.nan, 1.0]))
def array(self): # type: () -> ExtensionArray """ The ExtensionArray of the data backing this Series or Index. .. versionadded:: 0.24.0 Returns ------- array : ExtensionArray An ExtensionArray of the values stored within. For extension types, this is the actual array. For NumPy native types, this is a thin (no copy) wrapper around :class:`numpy.ndarray`. ``.array`` differs ``.values`` which may require converting the data to a different form. See Also -------- Index.to_numpy : Similar method that always returns a NumPy array. Series.to_numpy : Similar method that always returns a NumPy array. Notes ----- This table lays out the different array types for each extension dtype within pandas. ================== ============================= dtype array type ================== ============================= category Categorical period PeriodArray interval IntervalArray IntegerNA IntegerArray datetime64[ns, tz] DatetimeArray ================== ============================= For any 3rd-party extension types, the array type will be an ExtensionArray. For all remaining dtypes ``.array`` will be a :class:`arrays.NumpyExtensionArray` wrapping the actual ndarray stored within. If you absolutely need a NumPy array (possibly with copying / coercing data), then use :meth:`Series.to_numpy` instead. Examples -------- For regular NumPy types like int, and float, a PandasArray is returned. >>> pd.Series([1, 2, 3]).array <PandasArray> [1, 2, 3] Length: 3, dtype: int64 For extension types, like Categorical, the actual ExtensionArray is returned >>> ser = pd.Series(pd.Categorical(['a', 'b', 'a'])) >>> ser.array [a, b, a] Categories (2, object): [a, b] """ result = self._values # TODO(DatetimeArray): remvoe the second clause. if (not is_extension_array_dtype(result.dtype) and not is_datetime64tz_dtype(result.dtype)): from pandas.core.arrays.numpy_ import PandasArray result = PandasArray(result) return result